Source code for ray.data.namespace_expressions.arr_namespace

"""Array namespace for expression operations on array-typed columns."""

from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING

import pyarrow

from ray.data.datatype import DataType
from ray.data.expressions import pyarrow_udf

if TYPE_CHECKING:
    from ray.data.expressions import Expr, UDFExpr


[docs] @dataclass class _ArrayNamespace: """Namespace for array operations on expression columns. Example: >>> from ray.data.expressions import col >>> # Convert fixed-size lists to variable-length lists >>> expr = col("features").arr.to_list() """ _expr: Expr
[docs] def to_list(self) -> "UDFExpr": """Convert FixedSizeList columns into variable-length lists.""" return_dtype = DataType(object) expr_dtype = self._expr.data_type if expr_dtype.is_list_type(): arrow_type = expr_dtype.to_arrow_dtype() if pyarrow.types.is_fixed_size_list(arrow_type): return_dtype = DataType.from_arrow(pyarrow.list_(arrow_type.value_type)) else: return_dtype = expr_dtype @pyarrow_udf(return_dtype=return_dtype) def _to_list(arr: pyarrow.Array) -> pyarrow.Array: arr_dtype = DataType.from_arrow(arr.type) if not arr_dtype.is_list_type(): raise pyarrow.lib.ArrowInvalid( "to_list() can only be called on list-like columns, " f"but got {arr.type}" ) if isinstance(arr.type, pyarrow.FixedSizeListType): return arr.cast(pyarrow.list_(arr.type.value_type)) return arr return _to_list(self._expr)