Purge dask-expr (#1648)

hendrikmakait · web-flow · commit 1e9d69702cec · 2025-01-16T13:19:42.000+01:00
diff --git a/AB_environments/AB_baseline.conda.yaml b/AB_environments/AB_baseline.conda.yaml
@@ -14,7 +14,6 @@ dependencies:
   - pandas ==2.2.3
   - dask ==2024.11.2
   - distributed ==2024.11.2
-  - dask-expr ==1.1.19
   - dask-labextension ==7.0.0
   - dask-ml ==2024.4.4
   - fsspec ==2024.10.0
@@ -68,4 +67,3 @@ dependencies:
     # Read README.md for troubleshooting.
     # - git+https://github.com/dask/dask@191d39177009d2cce25b818878118e35329b6db3
     # - git+https://github.com/dask/distributed@0304fb6e665e36abf9e3086173cccd36e29ae84d
-    # - git+https://github.com/dask-contrib/dask-expr@9f765764da3f518ddd4c896c98b8a40a979a5553
diff --git a/AB_environments/AB_sample.conda.yaml b/AB_environments/AB_sample.conda.yaml
@@ -20,7 +20,6 @@ dependencies:
   - pandas ==2.2.3
   - dask ==2024.11.2
   - distributed ==2024.11.2
-  - dask-expr ==1.1.19
   - dask-labextension ==7.0.0
   - dask-ml ==2024.4.4
   - fsspec ==2024.10.0
@@ -74,4 +73,3 @@ dependencies:
     # Read README.md for troubleshooting.
     - git+https://github.com/dask/dask@191d39177009d2cce25b818878118e35329b6db3
     - git+https://github.com/dask/distributed@0304fb6e665e36abf9e3086173cccd36e29ae84d
-    - git+https://github.com/dask-contrib/dask-expr@9f765764da3f518ddd4c896c98b8a40a979a5553
diff --git a/AB_environments/README.md b/AB_environments/README.md
@@ -222,7 +222,6 @@ As a handy copy-paste to run from the root dir of this repository:
 ```bash
 pushd ../dask        && git fetch upstream --tags && git push origin --tags && popd
 pushd ../distributed && git fetch upstream --tags && git push origin --tags && popd
-pushd ../dask-expr   && git fetch upstream --tags && git push origin --tags && popd
 ```
 
 #### Problem:
diff --git a/ci/environment-dashboard.yml b/ci/environment-dashboard.yml
@@ -14,7 +14,6 @@ dependencies:
   - coiled
   - conda
   - dask
-  - dask-expr
   - dask-ml
   - distributed
   - filelock
diff --git a/ci/environment-git-tip.yml b/ci/environment-git-tip.yml
@@ -7,4 +7,3 @@ dependencies:
     - git+https://github.com/dask/dask
     - git+https://github.com/dask/distributed
     - git+https://github.com/dask/zict
-    - git+https://github.com/dask-contrib/dask-expr
diff --git a/ci/environment.yml b/ci/environment.yml
@@ -15,7 +15,6 @@ dependencies:
   - pandas ==2.2.3
   - dask ==2024.11.2
   - distributed ==2024.11.2
-  - dask-expr ==1.1.19
   - dask-labextension ==7.0.0
   - dask-ml ==2024.4.4
   - fsspec ==2024.10.0
diff --git a/tests/benchmarks/test_join.py b/tests/benchmarks/test_join.py
@@ -24,7 +24,7 @@ def test_join_big(small_client, memory_multiplier):
     df2_big = df2_big.astype({"predicate": "int"})
 
     join = df1_big.merge(df2_big, on="predicate", how="inner")
-    # dask-expr will drop all columns except the Index for size
+    # dask.dataframe will drop all columns except the Index for size
     # computations, which will optimize itself through merges, e.g.
     # shuffling a lot less data than what we want to test
     # map_partitions blocks those optimizations
@@ -55,7 +55,7 @@ def test_join_big_small(small_client, memory_multiplier, configure_shuffling):
     df_small_pd = df_small.astype({"predicate": "int"}).compute()
 
     join = df_big.merge(df_small_pd, on="predicate", how="inner")
-    # dask-expr will drop all columns except the Index for size
+    # dask.dataframe will drop all columns except the Index for size
     # computations, which will optimize itself through merges, e.g.
     # shuffling a lot less data than what we want to test
     # map_partitions blocks those optimizations
@@ -77,7 +77,7 @@ def test_set_index(small_client, persist, memory_multiplier):
     if persist:
         df_big = df_big.persist()
     df_indexed = df_big.set_index("0")
-    # dask-expr will drop all columns except the Index for size
+    # dask.dataframe will drop all columns except the Index for size
     # computations, which will optimize itself through set_index, e.g.
     # shuffling a lot less data than what we want to test
     # map_partitions blocks those optimizations
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -17,7 +17,6 @@
 import adlfs
 import dask
 import dask.array as da
-import dask_expr
 import distributed
 import filelock
 import gcsfs
@@ -174,7 +173,6 @@ def database_table_schema(request, testrun_uid):
         originalname=request.node.originalname,
         path=str(request.node.path.relative_to(TEST_DIR)),
         dask_version=dask.__version__,
-        dask_expr_version=dask_expr.__version__,
         distributed_version=distributed.__version__,
         coiled_runtime_version=os.environ.get("AB_VERSION", "upstream"),
         coiled_software_name=COILED_SOFTWARE_NAME,
diff --git a/tests/tpch/conftest.py b/tests/tpch/conftest.py
@@ -7,7 +7,6 @@
 
 import coiled
 import dask
-import dask_expr
 import distributed
 import filelock
 import pytest
@@ -142,7 +141,6 @@ def tpch_database_table_schema(request, testrun_uid, scale, query, local):
         originalname=request.node.originalname,
         path=str(request.node.path.relative_to(TEST_DIR)),
         dask_version=dask.__version__,
-        dask_expr_version=dask_expr.__version__,
         distributed_version=distributed.__version__,
         python_version=".".join(map(str, sys.version_info)),
         platform=sys.platform,
diff --git a/tests/tpch/dask_queries.py b/tests/tpch/dask_queries.py
@@ -949,7 +949,7 @@ def query_18(dataset_path, fs, scale):
     orders = dd.read_parquet(dataset_path + "orders", filesystem=fs)
     lineitem = dd.read_parquet(dataset_path + "lineitem", filesystem=fs)
 
-    # FIXME: https://github.com/dask-contrib/dask-expr/issues/867
+    # FIXME: https://github.com/dask/dask-expr/issues/867
     qnt_over_300 = (
         lineitem.groupby("l_orderkey").l_quantity.sum(split_out=True).reset_index()
     )
diff --git a/tests/tpch/test_dask.py b/tests/tpch/test_dask.py
@@ -15,7 +15,7 @@
 @pytest.fixture(scope="session")
 def dataset_path(local, scale):
     if local:
-        # FIXME: pyarrow local fs is a bit odd. dask-expr should deal with this
+        # FIXME: pyarrow local fs is a bit odd. dask.dataframe should deal with this
         return "file://" + os.path.abspath(get_dataset_path(local, scale)) + "/"
     else:
         return get_dataset_path(local, scale)

Original file line number	Diff line number	Diff line change
`@@ -949,7 +949,7 @@ def query_18(dataset_path, fs, scale):`
`949`	`949`	`orders = dd.read_parquet(dataset_path + "orders", filesystem=fs)`
`950`	`950`	`lineitem = dd.read_parquet(dataset_path + "lineitem", filesystem=fs)`
`951`	`951`
`952`		`- # FIXME: https://github.com/dask-contrib/dask-expr/issues/867`
	`952`	`+ # FIXME: https://github.com/dask/dask-expr/issues/867`
`953`	`953`	`qnt_over_300 = (`
`954`	`954`	`lineitem.groupby("l_orderkey").l_quantity.sum(split_out=True).reset_index()`
`955`	`955`	`)`