Skip to content

Commit d772b47

Browse files
vnlitvinovrgommers
authored andcommitted
Change API a bit, align formatting with pandas
Signed-off-by: Vasily Litvinov <[email protected]>
1 parent 6b49f22 commit d772b47

File tree

1 file changed

+44
-5
lines changed

1 file changed

+44
-5
lines changed

protocol/dataframe_protocol.py

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
1-
from typing import Tuple, Optional, Dict, Any, Iterable, Sequence, TypedDict
1+
from abc import (
2+
ABC,
3+
abstractmethod,
4+
)
25
import enum
3-
from abc import ABC, abstractmethod
6+
from typing import (
7+
Any,
8+
Dict,
9+
Iterable,
10+
Optional,
11+
Sequence,
12+
Tuple,
13+
TypedDict,
14+
)
415

516

617
class DlpackDeviceType(enum.IntEnum):
@@ -89,6 +100,16 @@ class ColumnBuffers(TypedDict):
89100
offsets: Optional[Tuple["Buffer", Any]]
90101

91102

103+
class CategoricalDescription(TypedDict):
104+
# whether the ordering of dictionary indices is semantically meaningful
105+
is_ordered: bool
106+
# whether a dictionary-style mapping of categorical values to other objects exists
107+
is_dictionary: bool
108+
# Python-level only (e.g. ``{int: str}``).
109+
# None if not a dictionary-style categorical.
110+
categories: Optional[Column]
111+
112+
92113
class Buffer(ABC):
93114
"""
94115
Data in the buffer is guaranteed to be contiguous in memory.
@@ -191,7 +212,7 @@ class Column(ABC):
191212

192213
@property
193214
@abstractmethod
194-
def size(self) -> Optional[int]:
215+
def size(self) -> int:
195216
"""
196217
Size of the column, in elements.
197218
@@ -246,15 +267,15 @@ def dtype(self) -> Tuple[DtypeKind, int, str, str]:
246267

247268
@property
248269
@abstractmethod
249-
def describe_categorical(self) -> dict[bool, bool, Optional[Column]]:
270+
def describe_categorical(self) -> CategoricalDescription:
250271
"""
251272
If the dtype is categorical, there are two options:
252273
- There are only values in the data buffer.
253274
- There is a separate non-categorical Column encoding categorical values.
254275
255276
Raises TypeError if the dtype is not categorical
256277
257-
Returns the description on how to interpret the data buffer:
278+
Returns the dictionary with description on how to interpret the data buffer:
258279
- "is_ordered" : bool, whether the ordering of dictionary indices is
259280
semantically meaningful.
260281
- "is_dictionary" : bool, whether a mapping of
@@ -364,6 +385,24 @@ class DataFrame(ABC):
364385

365386
version = 0 # version of the protocol
366387

388+
@abstractmethod
389+
def __dataframe__(
390+
self, nan_as_null: bool = False, allow_copy: bool = True
391+
) -> "DataFrame":
392+
"""
393+
Construct a new exchange object, potentially changing the parameters.
394+
395+
``nan_as_null`` is a keyword intended for the consumer to tell the
396+
producer to overwrite null values in the data with ``NaN`` (or ``NaT``).
397+
It is intended for cases where the consumer does not support the bit
398+
mask or byte mask that is the producer's native representation.
399+
``allow_copy`` is a keyword that defines whether or not the library is
400+
allowed to make a copy of the data. For example, copying data would be
401+
necessary if a library supports strided buffers, given that this protocol
402+
specifies contiguous buffers.
403+
"""
404+
pass
405+
367406
@property
368407
@abstractmethod
369408
def metadata(self) -> Dict[str, Any]:

0 commit comments

Comments
 (0)