Skip to content

Commit 7d05b50

Browse files
committed
fix docs as per review
.unique of Categorical/Series now returns Categorical
1 parent 220a86b commit 7d05b50

File tree

5 files changed

+128
-48
lines changed

5 files changed

+128
-48
lines changed

doc/source/whatsnew/v0.20.0.txt

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -608,36 +608,36 @@ datatypes would yield different return types. These are now made consistent. (:i
608608
.. code-block:: ipython
609609

610610
# Series
611-
In [5]: Series(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
612-
pd.Timestamp('20160101', tz='US/Eastern')])).unique()
611+
In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
612+
pd.Timestamp('20160101', tz='US/Eastern')]).unique()
613613
Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object)
614614

615-
In [6]: pd.unique(Series(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
616-
pd.Timestamp('20160101', tz='US/Eastern')])))
615+
In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
616+
pd.Timestamp('20160101', tz='US/Eastern')]))
617617
Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]')
618618

619619
# Index
620620
In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
621621
pd.Timestamp('20160101', tz='US/Eastern')]).unique()
622622
Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None)
623623

624-
In [8]: pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
625-
pd.Timestamp('20160101', tz='US/Eastern')]))
624+
In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'),
625+
pd.Timestamp('20160101', tz='US/Eastern')])
626626
Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]')
627627

628628
New Behavior:
629629

630630
.. ipython:: python
631631

632632
# Series, returns an array of Timestamp tz-aware
633-
Series(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
634-
pd.Timestamp('20160101', tz='US/Eastern')])).unique()
635-
pd.unique(Series(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
636-
pd.Timestamp('20160101', tz='US/Eastern')])))
633+
pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
634+
pd.Timestamp('20160101', tz='US/Eastern')]).unique()
635+
pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
636+
pd.Timestamp('20160101', tz='US/Eastern')]))
637637

638638
# Index, returns a DatetimeIndex
639639
pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
640-
pd.Timestamp('20160101', tz='US/Eastern')])
640+
pd.Timestamp('20160101', tz='US/Eastern')]).unique()
641641
pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
642642
pd.Timestamp('20160101', tz='US/Eastern')]))
643643

@@ -647,21 +647,21 @@ datatypes would yield different return types. These are now made consistent. (:i
647647

648648
.. code-block:: ipython
649649

650-
In [1]: pd.Series(pd.Categorical(list('aabc'))).unique()
650+
In [1]: pd.Series(pd.Categorical(list('baabc'))).unique()
651651
Out[1]:
652-
[a, b, c]
653-
Categories (3, object): [a, b, c]
652+
[b, a, c]
653+
Categories (3, object): [b, a, c]
654654

655-
In [2]: pd.unique(pd.Series(pd.Categorical(list('aabc'))).unique())
656-
Out[2]: array(['a', 'b', 'c'], dtype=object)
655+
In [2]: pd.unique(pd.Series(pd.Categorical(list('baabc'))))
656+
Out[2]: array(['b', 'a', 'c'], dtype=object)
657657

658658
New Behavior:
659659

660660
.. ipython:: python
661661

662662
# returns a Categorical
663-
pd.Series(pd.Categorical(list('aabc'))).unique()
664-
pd.unique(pd.Series(pd.Categorical(list('aabc'))).unique())
663+
pd.Series(pd.Categorical(list('baabc'))).unique()
664+
pd.unique(pd.Series(pd.Categorical(list('baabc'))).unique())
665665

666666
.. _whatsnew_0200.api_breaking.s3:
667667

pandas/core/algorithms.py

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -269,19 +269,21 @@ def match(to_match, values, na_sentinel=-1):
269269

270270
def unique(values):
271271
"""
272-
Hash table-based unique. uniques are returned in order
272+
Hash table-based unique. Uniques are returned in order
273273
of appearance. This does NOT sort.
274274
275+
Significantly faster than numpy.unique. Includes NA values.
276+
275277
Parameters
276278
----------
277279
values : 1d array-like
278280
279281
Returns
280282
-------
281283
unique values.
282-
- If the input is a Categorical dtype, the return is a Categorical
283-
- If the input is an Index, the return is an Index
284-
- If the input is a Series/ndarray, the return will be an ndarray
284+
- If the input is an Index, the return is an Index
285+
- If the input is a Categorical dtype, the return is a Categorical
286+
- If the input is a Series/ndarray, the return will be an ndarray
285287
286288
Examples
287289
--------
@@ -305,26 +307,43 @@ def unique(values):
305307
DatetimeIndex(['2016-01-01 00:00:00-05:00'],
306308
... dtype='datetime64[ns, US/Eastern]', freq=None)
307309
308-
>>> pd.unique(list('aabc'))
309-
array(['a', 'b', 'c'], dtype=object)
310+
>>> pd.unique(list('baabc'))
311+
array(['b', 'a', 'c'], dtype=object)
312+
313+
An unordered Categorical will return categories in the
314+
order of appearance.
315+
316+
>>> pd.unique(Series(pd.Categorical(list('baabc'))))
317+
[b, a, c]
318+
Categories (3, object): [b, a, c]
319+
320+
>>> pd.unique(Series(pd.Categorical(list('baabc'),
321+
... categories=list('abc'))))
322+
[b, a, c]
323+
Categories (3, object): [b, a, c]
310324
311-
>>> pd.unique(Series(pd.Categorical(list('aabc'))))
312-
0 a
313-
1 b
314-
2 c
315-
dtype: category
316-
Categories (3, object): [a, b, c]
325+
An ordered Categorical preserves the category ordering.
326+
327+
>>> pd.unique(Series(pd.Categorical(list('baabc'),
328+
... categories=list('abc'),
329+
... ordered=True)))
330+
[b, a, c]
331+
Categories (3, object): [a < b < c]
332+
333+
See Also
334+
--------
335+
pd.Index.unique
336+
pd.Series.unique
317337
318338
"""
319339

320340
values = _ensure_arraylike(values)
321341

322342
# categorical is a fast-path
343+
# this will coerce Categorical, CategoricalIndex,
344+
# and category dtypes Series to same return of Category
323345
if is_categorical_dtype(values):
324-
325-
if isinstance(values, ABCSeries):
326-
from pandas import Series
327-
return Series(values.values.unique(), name=values.name)
346+
values = getattr(values, '.values', values)
328347
return values.unique()
329348

330349
original = values

pandas/core/base.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -855,13 +855,24 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
855855

856856
_shared_docs['unique'] = (
857857
"""
858-
Return %(unique)s of unique values in the object.
859-
Significantly faster than numpy.unique. Includes NA values.
860-
The order of the original is preserved.
858+
Hash table-based unique. Uniques are returned in order
859+
of appearance. This does NOT sort.
860+
861+
Parameters
862+
----------
863+
values : 1d array-like
861864
862865
Returns
863866
-------
864-
uniques : %(unique)s
867+
unique values.
868+
- If the input is an Index, the return is an Index
869+
- If the input is a Categorical dtype, the return is a Categorical
870+
- If the input is a Series/ndarray, the return will be an ndarray
871+
872+
See Also
873+
--------
874+
pd.unique
875+
pd.Categorical.unique
865876
""")
866877

867878
@Appender(_shared_docs['unique'] % _indexops_doc_kwargs)

pandas/core/categorical.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,6 +1895,33 @@ def unique(self):
18951895
Returns
18961896
-------
18971897
unique values : ``Categorical``
1898+
1899+
Examples
1900+
--------
1901+
An unordered Categorical will return categories in the
1902+
order of appearance.
1903+
1904+
>>> pd.Categorical(list('baabc'))
1905+
[b, a, c]
1906+
Categories (3, object): [b, a, c]
1907+
1908+
>>> pd.Categorical(list('baabc'), categories=list('abc'))
1909+
[b, a, c]
1910+
Categories (3, object): [b, a, c]
1911+
1912+
An ordered Categorical preserves the category ordering.
1913+
1914+
>>> pd.Categorical(list('baabc'),
1915+
... categories=list('abc'),
1916+
... ordered=True)
1917+
[b, a, c]
1918+
Categories (3, object): [a < b < c]
1919+
1920+
See Also
1921+
--------
1922+
pd.unique
1923+
pd.CategoricalIndex.unique
1924+
18981925
"""
18991926

19001927
# unlike np.unique, unique1d does not sort

pandas/tests/test_algos.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -386,23 +386,46 @@ def test_uint64_overflow(self):
386386
tm.assert_numpy_array_equal(algos.unique(s), exp)
387387

388388
def test_categorical(self):
389+
390+
# we are expecting to return in the order
391+
# of appearance
392+
expected = pd.Categorical(list('bac'),
393+
categories=list('bac'))
394+
395+
# we are expecting to return in the order
396+
# of the categories
397+
expected_o = pd.Categorical(list('bac'),
398+
categories=list('abc'),
399+
ordered=True)
400+
389401
# GH 15939
390-
c = pd.Categorical(list('aabc'))
402+
c = pd.Categorical(list('baabc'))
391403
result = c.unique()
392-
expected = pd.Categorical(list('abc'))
393404
tm.assert_categorical_equal(result, expected)
394405

395406
result = algos.unique(c)
396407
tm.assert_categorical_equal(result, expected)
397408

398-
result = algos.unique(Series(c, name='foo'))
399-
expected = Series(expected, name='foo')
400-
tm.assert_series_equal(result, expected)
409+
c = pd.Categorical(list('baabc'), ordered=True)
410+
result = c.unique()
411+
tm.assert_categorical_equal(result, expected_o)
412+
413+
result = algos.unique(c)
414+
tm.assert_categorical_equal(result, expected_o)
401415

402-
# CI
403-
ci = pd.CategoricalIndex(pd.Categorical(list('aabc')))
416+
# Series of categorical dtype
417+
s = Series(pd.Categorical(list('baabc')), name='foo')
418+
result = s.unique()
419+
tm.assert_categorical_equal(result, expected)
420+
421+
result = pd.unique(s)
422+
tm.assert_categorical_equal(result, expected)
423+
424+
# CI -> return CI
425+
ci = pd.CategoricalIndex(pd.Categorical(list('baabc'),
426+
categories=list('bac')))
427+
expected = pd.CategoricalIndex(expected)
404428
result = ci.unique()
405-
expected = pd.CategoricalIndex(pd.Categorical(list('abc')))
406429
tm.assert_index_equal(result, expected)
407430

408431
result = pd.unique(ci)
@@ -468,8 +491,8 @@ def test_order_of_appearance(self):
468491
tm.assert_numpy_array_equal(result, expected)
469492

470493
result = pd.unique(Series(pd.Categorical(list('aabc'))))
471-
expected = Series(pd.Categorical(list('abc')))
472-
tm.assert_series_equal(result, expected)
494+
expected = pd.Categorical(list('abc'))
495+
tm.assert_categorical_equal(result, expected)
473496

474497

475498
class TestIsin(tm.TestCase):

0 commit comments

Comments
 (0)