@@ -456,35 +456,6 @@ def test_sample(self):
456
456
with tm .assertRaises (ValueError ):
457
457
o .sample (n = 3 , weights = nan_weights )
458
458
459
- # A few dataframe test with degenerate weights.
460
- easy_weight_list = [0 ] * 10
461
- easy_weight_list [5 ] = 1
462
-
463
- df = pd .DataFrame ({'col1' : range (10 , 20 ),
464
- 'col2' : range (20 , 30 ),
465
- 'colString' : ['a' ] * 10 ,
466
- 'easyweights' : easy_weight_list })
467
- sample1 = df .sample (n = 1 , weights = 'easyweights' )
468
- assert_frame_equal (sample1 , df .iloc [5 :6 ])
469
-
470
- # Ensure proper error if string given as weight for Series, panel, or
471
- # DataFrame with axis = 1.
472
- s = Series (range (10 ))
473
- with tm .assertRaises (ValueError ):
474
- s .sample (n = 3 , weights = 'weight_column' )
475
-
476
- panel = pd .Panel (items = [0 , 1 , 2 ], major_axis = [2 , 3 , 4 ],
477
- minor_axis = [3 , 4 , 5 ])
478
- with tm .assertRaises (ValueError ):
479
- panel .sample (n = 1 , weights = 'weight_column' )
480
-
481
- with tm .assertRaises (ValueError ):
482
- df .sample (n = 1 , weights = 'weight_column' , axis = 1 )
483
-
484
- # Check weighting key error
485
- with tm .assertRaises (KeyError ):
486
- df .sample (n = 3 , weights = 'not_a_real_column_name' )
487
-
488
459
# Check np.nan are replaced by zeros.
489
460
weights_with_nan = [np .nan ] * 10
490
461
weights_with_nan [5 ] = 0.5
@@ -497,90 +468,6 @@ def test_sample(self):
497
468
self ._compare (
498
469
o .sample (n = 1 , axis = 0 , weights = weights_with_None ), o .iloc [5 :6 ])
499
470
500
- # Check that re-normalizes weights that don't sum to one.
501
- weights_less_than_1 = [0 ] * 10
502
- weights_less_than_1 [0 ] = 0.5
503
- tm .assert_frame_equal (
504
- df .sample (n = 1 , weights = weights_less_than_1 ), df .iloc [:1 ])
505
-
506
- ###
507
- # Test axis argument
508
- ###
509
-
510
- # Test axis argument
511
- df = pd .DataFrame ({'col1' : range (10 ), 'col2' : ['a' ] * 10 })
512
- second_column_weight = [0 , 1 ]
513
- assert_frame_equal (
514
- df .sample (n = 1 , axis = 1 , weights = second_column_weight ), df [['col2' ]])
515
-
516
- # Different axis arg types
517
- assert_frame_equal (df .sample (n = 1 , axis = 'columns' ,
518
- weights = second_column_weight ),
519
- df [['col2' ]])
520
-
521
- weight = [0 ] * 10
522
- weight [5 ] = 0.5
523
- assert_frame_equal (df .sample (n = 1 , axis = 'rows' , weights = weight ),
524
- df .iloc [5 :6 ])
525
- assert_frame_equal (df .sample (n = 1 , axis = 'index' , weights = weight ),
526
- df .iloc [5 :6 ])
527
-
528
- # Check out of range axis values
529
- with tm .assertRaises (ValueError ):
530
- df .sample (n = 1 , axis = 2 )
531
-
532
- with tm .assertRaises (ValueError ):
533
- df .sample (n = 1 , axis = 'not_a_name' )
534
-
535
- with tm .assertRaises (ValueError ):
536
- s = pd .Series (range (10 ))
537
- s .sample (n = 1 , axis = 1 )
538
-
539
- # Test weight length compared to correct axis
540
- with tm .assertRaises (ValueError ):
541
- df .sample (n = 1 , axis = 1 , weights = [0.5 ] * 10 )
542
-
543
- # Check weights with axis = 1
544
- easy_weight_list = [0 ] * 3
545
- easy_weight_list [2 ] = 1
546
-
547
- df = pd .DataFrame ({'col1' : range (10 , 20 ),
548
- 'col2' : range (20 , 30 ),
549
- 'colString' : ['a' ] * 10 })
550
- sample1 = df .sample (n = 1 , axis = 1 , weights = easy_weight_list )
551
- assert_frame_equal (sample1 , df [['colString' ]])
552
-
553
- # Test default axes
554
- p = pd .Panel (items = ['a' , 'b' , 'c' ], major_axis = [2 , 4 , 6 ],
555
- minor_axis = [1 , 3 , 5 ])
556
- assert_panel_equal (
557
- p .sample (n = 3 , random_state = 42 ), p .sample (n = 3 , axis = 1 ,
558
- random_state = 42 ))
559
- assert_frame_equal (
560
- df .sample (n = 3 , random_state = 42 ), df .sample (n = 3 , axis = 0 ,
561
- random_state = 42 ))
562
-
563
- # Test that function aligns weights with frame
564
- df = DataFrame (
565
- {'col1' : [5 , 6 , 7 ],
566
- 'col2' : ['a' , 'b' , 'c' ], }, index = [9 , 5 , 3 ])
567
- s = Series ([1 , 0 , 0 ], index = [3 , 5 , 9 ])
568
- assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s ))
569
-
570
- # Weights have index values to be dropped because not in
571
- # sampled DataFrame
572
- s2 = Series ([0.001 , 0 , 10000 ], index = [3 , 5 , 10 ])
573
- assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s2 ))
574
-
575
- # Weights have empty values to be filed with zeros
576
- s3 = Series ([0.01 , 0 ], index = [3 , 5 ])
577
- assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s3 ))
578
-
579
- # No overlap in weight and sampled DataFrame indices
580
- s4 = Series ([1 , 0 ], index = [1 , 2 ])
581
- with tm .assertRaises (ValueError ):
582
- df .sample (1 , weights = s4 )
583
-
584
471
def test_size_compat (self ):
585
472
# GH8846
586
473
# size property should be defined
@@ -1963,6 +1850,9 @@ class TestPanel4D(tm.TestCase, Generic):
1963
1850
_typ = Panel4D
1964
1851
_comparator = lambda self , x , y : assert_panel4d_equal (x , y )
1965
1852
1853
+ def test_sample (self ):
1854
+ raise nose .SkipTest ("sample on Panel4D" )
1855
+
1966
1856
def test_to_xarray (self ):
1967
1857
1968
1858
tm ._skip_if_no_xarray ()
@@ -1984,6 +1874,123 @@ def test_to_xarray(self):
1984
1874
class TestNDFrame (tm .TestCase ):
1985
1875
# tests that don't fit elsewhere
1986
1876
1877
+ def test_sample (sel ):
1878
+ # Fixes issue: 2419
1879
+ # additional specific object based tests
1880
+
1881
+ # A few dataframe test with degenerate weights.
1882
+ easy_weight_list = [0 ] * 10
1883
+ easy_weight_list [5 ] = 1
1884
+
1885
+ df = pd .DataFrame ({'col1' : range (10 , 20 ),
1886
+ 'col2' : range (20 , 30 ),
1887
+ 'colString' : ['a' ] * 10 ,
1888
+ 'easyweights' : easy_weight_list })
1889
+ sample1 = df .sample (n = 1 , weights = 'easyweights' )
1890
+ assert_frame_equal (sample1 , df .iloc [5 :6 ])
1891
+
1892
+ # Ensure proper error if string given as weight for Series, panel, or
1893
+ # DataFrame with axis = 1.
1894
+ s = Series (range (10 ))
1895
+ with tm .assertRaises (ValueError ):
1896
+ s .sample (n = 3 , weights = 'weight_column' )
1897
+
1898
+ panel = pd .Panel (items = [0 , 1 , 2 ], major_axis = [2 , 3 , 4 ],
1899
+ minor_axis = [3 , 4 , 5 ])
1900
+ with tm .assertRaises (ValueError ):
1901
+ panel .sample (n = 1 , weights = 'weight_column' )
1902
+
1903
+ with tm .assertRaises (ValueError ):
1904
+ df .sample (n = 1 , weights = 'weight_column' , axis = 1 )
1905
+
1906
+ # Check weighting key error
1907
+ with tm .assertRaises (KeyError ):
1908
+ df .sample (n = 3 , weights = 'not_a_real_column_name' )
1909
+
1910
+ # Check that re-normalizes weights that don't sum to one.
1911
+ weights_less_than_1 = [0 ] * 10
1912
+ weights_less_than_1 [0 ] = 0.5
1913
+ tm .assert_frame_equal (
1914
+ df .sample (n = 1 , weights = weights_less_than_1 ), df .iloc [:1 ])
1915
+
1916
+ ###
1917
+ # Test axis argument
1918
+ ###
1919
+
1920
+ # Test axis argument
1921
+ df = pd .DataFrame ({'col1' : range (10 ), 'col2' : ['a' ] * 10 })
1922
+ second_column_weight = [0 , 1 ]
1923
+ assert_frame_equal (
1924
+ df .sample (n = 1 , axis = 1 , weights = second_column_weight ), df [['col2' ]])
1925
+
1926
+ # Different axis arg types
1927
+ assert_frame_equal (df .sample (n = 1 , axis = 'columns' ,
1928
+ weights = second_column_weight ),
1929
+ df [['col2' ]])
1930
+
1931
+ weight = [0 ] * 10
1932
+ weight [5 ] = 0.5
1933
+ assert_frame_equal (df .sample (n = 1 , axis = 'rows' , weights = weight ),
1934
+ df .iloc [5 :6 ])
1935
+ assert_frame_equal (df .sample (n = 1 , axis = 'index' , weights = weight ),
1936
+ df .iloc [5 :6 ])
1937
+
1938
+ # Check out of range axis values
1939
+ with tm .assertRaises (ValueError ):
1940
+ df .sample (n = 1 , axis = 2 )
1941
+
1942
+ with tm .assertRaises (ValueError ):
1943
+ df .sample (n = 1 , axis = 'not_a_name' )
1944
+
1945
+ with tm .assertRaises (ValueError ):
1946
+ s = pd .Series (range (10 ))
1947
+ s .sample (n = 1 , axis = 1 )
1948
+
1949
+ # Test weight length compared to correct axis
1950
+ with tm .assertRaises (ValueError ):
1951
+ df .sample (n = 1 , axis = 1 , weights = [0.5 ] * 10 )
1952
+
1953
+ # Check weights with axis = 1
1954
+ easy_weight_list = [0 ] * 3
1955
+ easy_weight_list [2 ] = 1
1956
+
1957
+ df = pd .DataFrame ({'col1' : range (10 , 20 ),
1958
+ 'col2' : range (20 , 30 ),
1959
+ 'colString' : ['a' ] * 10 })
1960
+ sample1 = df .sample (n = 1 , axis = 1 , weights = easy_weight_list )
1961
+ assert_frame_equal (sample1 , df [['colString' ]])
1962
+
1963
+ # Test default axes
1964
+ p = pd .Panel (items = ['a' , 'b' , 'c' ], major_axis = [2 , 4 , 6 ],
1965
+ minor_axis = [1 , 3 , 5 ])
1966
+ assert_panel_equal (
1967
+ p .sample (n = 3 , random_state = 42 ), p .sample (n = 3 , axis = 1 ,
1968
+ random_state = 42 ))
1969
+ assert_frame_equal (
1970
+ df .sample (n = 3 , random_state = 42 ), df .sample (n = 3 , axis = 0 ,
1971
+ random_state = 42 ))
1972
+
1973
+ # Test that function aligns weights with frame
1974
+ df = DataFrame (
1975
+ {'col1' : [5 , 6 , 7 ],
1976
+ 'col2' : ['a' , 'b' , 'c' ], }, index = [9 , 5 , 3 ])
1977
+ s = Series ([1 , 0 , 0 ], index = [3 , 5 , 9 ])
1978
+ assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s ))
1979
+
1980
+ # Weights have index values to be dropped because not in
1981
+ # sampled DataFrame
1982
+ s2 = Series ([0.001 , 0 , 10000 ], index = [3 , 5 , 10 ])
1983
+ assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s2 ))
1984
+
1985
+ # Weights have empty values to be filed with zeros
1986
+ s3 = Series ([0.01 , 0 ], index = [3 , 5 ])
1987
+ assert_frame_equal (df .loc [[3 ]], df .sample (1 , weights = s3 ))
1988
+
1989
+ # No overlap in weight and sampled DataFrame indices
1990
+ s4 = Series ([1 , 0 ], index = [1 , 2 ])
1991
+ with tm .assertRaises (ValueError ):
1992
+ df .sample (1 , weights = s4 )
1993
+
1987
1994
def test_squeeze (self ):
1988
1995
# noop
1989
1996
for s in [tm .makeFloatSeries (), tm .makeStringSeries (),
0 commit comments