Skip to content

Commit bc9d370

Browse files
authored
feat: expose image attribute as expression (#4848)
1 parent 1e4d129 commit bc9d370

File tree

19 files changed

+352
-12
lines changed

19 files changed

+352
-12
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ repos:
8585
language: system
8686
types: [rust]
8787
pass_filenames: false
88-
args: [--workspace, --all-targets]
88+
args: [--workspace, --all-targets, -v]
8989

9090
- id: cargo-check-all-features
9191
name: cargo check (all features)

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

daft/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def refresh_logger() -> None:
7474
from_pylist,
7575
from_ray_dataset,
7676
)
77-
from daft.daft import ImageFormat, ImageMode, ResourceRequest
77+
from daft.daft import ImageFormat, ImageMode, ImageProperty, ResourceRequest
7878
from daft.dataframe import DataFrame
7979
from daft.schema import Schema
8080
from daft.datatype import DataType, TimeUnit
@@ -161,6 +161,7 @@ def refresh_logger() -> None:
161161
"Identifier",
162162
"ImageFormat",
163163
"ImageMode",
164+
"ImageProperty",
164165
"ResourceRequest",
165166
"Schema",
166167
"Series",

daft/daft/__init__.pyi

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ class ImageMode(Enum):
7777
"""
7878
...
7979

80+
class ImageProperty(Enum):
81+
"""Supported image properties for Daft's image type."""
82+
83+
Height = 1
84+
Width = 2
85+
Channel = 3
86+
Mode = 4
87+
88+
@staticmethod
89+
def from_property_string(attr: str) -> ImageProperty: ...
90+
8091
class PyWindowBoundary:
8192
"""Represents a window frame boundary in window functions."""
8293

daft/expressions/expressions.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
CountMode,
2121
ImageFormat,
2222
ImageMode,
23+
ImageProperty,
2324
ResourceRequest,
2425
initialize_udfs,
2526
resolved_col,
@@ -5224,6 +5225,60 @@ def to_mode(self, mode: str | ImageMode) -> Expression:
52245225
f = native.get_function_from_registry("to_mode")
52255226
return Expression._from_pyexpr(f(self._expr, mode=image_mode))
52265227

5228+
def attribute(self, name: Literal["width", "height", "channel", "mode"] | ImageProperty) -> Expression:
5229+
"""Get a property of the image, such as 'width', 'height', 'channel', or 'mode'.
5230+
5231+
Args:
5232+
name (str): The name of the property to retrieve.
5233+
5234+
Returns:
5235+
Expression: An Expression representing the requested property.
5236+
"""
5237+
if isinstance(name, str):
5238+
name = ImageProperty.from_property_string(name)
5239+
f = native.get_function_from_registry("image_attribute")
5240+
return Expression._from_pyexpr(f(self._expr, lit(name)._expr))
5241+
5242+
def width(self) -> Expression:
5243+
"""Gets the width of an image in pixels.
5244+
5245+
Example:
5246+
>>> # Create a dataframe with an image column
5247+
>>> df = ... # doctest: +SKIP
5248+
>>> df = df.with_column("width", df["images"].image.width()) # doctest: +SKIP
5249+
"""
5250+
return self.attribute("width")
5251+
5252+
def height(self) -> Expression:
5253+
"""Gets the height of an image in pixels.
5254+
5255+
Example:
5256+
>>> # Create a dataframe with an image column
5257+
>>> df = ... # doctest: +SKIP
5258+
>>> df = df.with_column("height", df["images"].image.height()) # doctest: +SKIP
5259+
"""
5260+
return self.attribute("height")
5261+
5262+
def channel(self) -> Expression:
5263+
"""Gets the number of channels in an image.
5264+
5265+
Example:
5266+
>>> # Create a dataframe with an image column
5267+
>>> df = ... # doctest: +SKIP
5268+
>>> df = df.with_column("channel", df["images"].image.channel()) # doctest: +SKIP
5269+
"""
5270+
return self.attribute("channel")
5271+
5272+
def mode(self) -> Expression:
5273+
"""Gets the mode of an image as a string.
5274+
5275+
Example:
5276+
>>> # Create a dataframe with an image column
5277+
>>> df = ... # doctest: +SKIP
5278+
>>> df = df.with_column("mode", df["images"].image.mode()) # doctest: +SKIP
5279+
"""
5280+
return self.attribute("mode")
5281+
52275282

52285283
class ExpressionPartitioningNamespace(ExpressionNamespace):
52295284
"""The following methods are available under the `expr.partition` attribute."""

src/daft-core/src/array/image_array.rs

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,39 @@ impl ImageArray {
3636
}
3737

3838
pub fn channel_array(&self) -> &arrow2::array::UInt16Array {
39-
let array = self.physical.children.get(Self::IMAGE_CHANNEL_IDX).unwrap();
40-
array.u16().unwrap().as_arrow()
39+
self.channels().as_arrow()
4140
}
4241

4342
pub fn height_array(&self) -> &arrow2::array::UInt32Array {
44-
let array = self.physical.children.get(Self::IMAGE_HEIGHT_IDX).unwrap();
45-
array.u32().unwrap().as_arrow()
43+
self.heights().as_arrow()
4644
}
4745

4846
pub fn width_array(&self) -> &arrow2::array::UInt32Array {
49-
let array = self.physical.children.get(Self::IMAGE_WIDTH_IDX).unwrap();
50-
array.u32().unwrap().as_arrow()
47+
self.widths().as_arrow()
5148
}
5249

5350
pub fn mode_array(&self) -> &arrow2::array::UInt8Array {
51+
self.modes().as_arrow()
52+
}
53+
54+
pub fn channels(&self) -> &DataArray<UInt16Type> {
55+
let array = self.physical.children.get(Self::IMAGE_CHANNEL_IDX).unwrap();
56+
array.u16().unwrap()
57+
}
58+
59+
pub fn heights(&self) -> &DataArray<UInt32Type> {
60+
let array = self.physical.children.get(Self::IMAGE_HEIGHT_IDX).unwrap();
61+
array.u32().unwrap()
62+
}
63+
64+
pub fn widths(&self) -> &DataArray<UInt32Type> {
65+
let array = self.physical.children.get(Self::IMAGE_WIDTH_IDX).unwrap();
66+
array.u32().unwrap()
67+
}
68+
69+
pub fn modes(&self) -> &DataArray<UInt8Type> {
5470
let array = self.physical.children.get(Self::IMAGE_MODE_IDX).unwrap();
55-
array.u8().unwrap().as_arrow()
71+
array.u8().unwrap()
5672
}
5773

5874
pub fn from_list_array(

src/daft-core/src/lit/conversions.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use super::{FromLiteral, Literal, deserializer::LiteralDeserializer};
99
use crate::python::{PyDataType, PyTimeUnit};
1010
use crate::{
1111
datatypes::IntervalValue,
12-
prelude::{CountMode, DataType, ImageFormat, ImageMode, TimeUnit},
12+
prelude::{CountMode, DataType, ImageFormat, ImageMode, ImageProperty, TimeUnit},
1313
series::Series,
1414
};
1515

@@ -223,7 +223,8 @@ impl_float_fromliteral!(f32);
223223
impl_float_fromliteral!(f64);
224224
impl_pyobj_fromliteral!(IOConfig, common_io_config::python::IOConfig);
225225
impl_pyobj_fromliteral!(ImageMode, ImageMode);
226-
impl_pyobj_fromliteral!(ImageFormat, ImageFormat);
226+
impl_pyobj_fromliteral!(ImageProperty, ImageProperty);
227227
impl_pyobj_fromliteral!(CountMode, CountMode);
228228
impl_pyobj_fromliteral!(TimeUnit, PyTimeUnit);
229229
impl_pyobj_fromliteral!(DataType, PyDataType);
230+
impl_pyobj_fromliteral!(ImageFormat, ImageFormat);

src/daft-core/src/prelude.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
55
// Re-export arrow2 bitmap
66
pub use arrow2::bitmap;
7+
pub use daft_schema::image_property::ImageProperty;
78
// Re-export core series structures
89
pub use daft_schema::schema::{Schema, SchemaRef};
910

src/daft-image/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ common-error = {path = "../common/error", default-features = false}
55
common-image = {workspace = true}
66
daft-core = {path = "../daft-core", default-features = false}
77
daft-dsl = {path = "../daft-dsl", default-features = false}
8+
daft-schema = {path = "../daft-schema", default-features = false}
89
log = {workspace = true}
910
serde = {workspace = true}
1011
typetag = {workspace = true}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
use common_error::{DaftError, DaftResult};
2+
use daft_core::prelude::*;
3+
use daft_dsl::{
4+
ExprRef,
5+
functions::{FunctionArgs, ScalarUDF},
6+
};
7+
use daft_schema::image_property::ImageProperty;
8+
use serde::{Deserialize, Serialize};
9+
10+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
11+
pub struct ImageAttribute;
12+
13+
#[derive(FunctionArgs)]
14+
struct ImageAttributeArgs<T> {
15+
input: T,
16+
attr: ImageProperty,
17+
}
18+
19+
#[typetag::serde]
20+
impl ScalarUDF for ImageAttribute {
21+
fn call(&self, inputs: FunctionArgs<Series>) -> DaftResult<Series> {
22+
let ImageAttributeArgs { input, attr } = inputs.try_into()?;
23+
crate::series::attribute(&input, attr)
24+
}
25+
26+
fn name(&self) -> &'static str {
27+
"image_attribute"
28+
}
29+
30+
fn get_return_field(
31+
&self,
32+
inputs: FunctionArgs<ExprRef>,
33+
schema: &Schema,
34+
) -> DaftResult<Field> {
35+
let ImageAttributeArgs { input, .. } = inputs.try_into()?;
36+
37+
let input_field = input.to_field(schema)?;
38+
match input_field.dtype {
39+
DataType::Image(_) | DataType::FixedShapeImage(..) => {
40+
Ok(Field::new(input_field.name, DataType::UInt32))
41+
}
42+
_ => Err(DaftError::TypeError(format!(
43+
"Image attribute can only be retrieved from ImageArrays, got {}",
44+
input_field.dtype
45+
))),
46+
}
47+
}
48+
49+
fn docstring(&self) -> &'static str {
50+
"Extracts metadata attributes from image series (height/width/channels/mode)"
51+
}
52+
}

0 commit comments

Comments
 (0)