|
1 |
| -from typing import Tuple, Optional, Dict, Any, Iterable, Sequence, TypedDict |
| 1 | +from abc import ( |
| 2 | + ABC, |
| 3 | + abstractmethod, |
| 4 | +) |
2 | 5 | import enum
|
3 |
| -from abc import ABC, abstractmethod |
| 6 | +from typing import ( |
| 7 | + Any, |
| 8 | + Dict, |
| 9 | + Iterable, |
| 10 | + Optional, |
| 11 | + Sequence, |
| 12 | + Tuple, |
| 13 | + TypedDict, |
| 14 | +) |
4 | 15 |
|
5 | 16 |
|
6 | 17 | class DlpackDeviceType(enum.IntEnum):
|
@@ -89,6 +100,16 @@ class ColumnBuffers(TypedDict):
|
89 | 100 | offsets: Optional[Tuple["Buffer", Any]]
|
90 | 101 |
|
91 | 102 |
|
| 103 | +class CategoricalDescription(TypedDict): |
| 104 | + # whether the ordering of dictionary indices is semantically meaningful |
| 105 | + is_ordered: bool |
| 106 | + # whether a dictionary-style mapping of categorical values to other objects exists |
| 107 | + is_dictionary: bool |
| 108 | + # Python-level only (e.g. ``{int: str}``). |
| 109 | + # None if not a dictionary-style categorical. |
| 110 | + categories: Optional[Column] |
| 111 | + |
| 112 | + |
92 | 113 | class Buffer(ABC):
|
93 | 114 | """
|
94 | 115 | Data in the buffer is guaranteed to be contiguous in memory.
|
@@ -191,7 +212,7 @@ class Column(ABC):
|
191 | 212 |
|
192 | 213 | @property
|
193 | 214 | @abstractmethod
|
194 |
| - def size(self) -> Optional[int]: |
| 215 | + def size(self) -> int: |
195 | 216 | """
|
196 | 217 | Size of the column, in elements.
|
197 | 218 |
|
@@ -246,15 +267,15 @@ def dtype(self) -> Tuple[DtypeKind, int, str, str]:
|
246 | 267 |
|
247 | 268 | @property
|
248 | 269 | @abstractmethod
|
249 |
| - def describe_categorical(self) -> dict[bool, bool, Optional[Column]]: |
| 270 | + def describe_categorical(self) -> CategoricalDescription: |
250 | 271 | """
|
251 | 272 | If the dtype is categorical, there are two options:
|
252 | 273 | - There are only values in the data buffer.
|
253 | 274 | - There is a separate non-categorical Column encoding categorical values.
|
254 | 275 |
|
255 | 276 | Raises TypeError if the dtype is not categorical
|
256 | 277 |
|
257 |
| - Returns the description on how to interpret the data buffer: |
| 278 | + Returns the dictionary with description on how to interpret the data buffer: |
258 | 279 | - "is_ordered" : bool, whether the ordering of dictionary indices is
|
259 | 280 | semantically meaningful.
|
260 | 281 | - "is_dictionary" : bool, whether a mapping of
|
@@ -364,6 +385,24 @@ class DataFrame(ABC):
|
364 | 385 |
|
365 | 386 | version = 0 # version of the protocol
|
366 | 387 |
|
| 388 | + @abstractmethod |
| 389 | + def __dataframe__( |
| 390 | + self, nan_as_null: bool = False, allow_copy: bool = True |
| 391 | + ) -> "DataFrame": |
| 392 | + """ |
| 393 | + Construct a new exchange object, potentially changing the parameters. |
| 394 | +
|
| 395 | + ``nan_as_null`` is a keyword intended for the consumer to tell the |
| 396 | + producer to overwrite null values in the data with ``NaN`` (or ``NaT``). |
| 397 | + It is intended for cases where the consumer does not support the bit |
| 398 | + mask or byte mask that is the producer's native representation. |
| 399 | + ``allow_copy`` is a keyword that defines whether or not the library is |
| 400 | + allowed to make a copy of the data. For example, copying data would be |
| 401 | + necessary if a library supports strided buffers, given that this protocol |
| 402 | + specifies contiguous buffers. |
| 403 | + """ |
| 404 | + pass |
| 405 | + |
367 | 406 | @property
|
368 | 407 | @abstractmethod
|
369 | 408 | def metadata(self) -> Dict[str, Any]:
|
|
0 commit comments