apache · mistercrunch · Dec 17, 2024 · Jan 9, 2025 · Jan 9, 2025
@@ -26,11 +26,12 @@ runs:
       shell: bash
       run: |
         if [ "${{ inputs.python-version }}" = "current" ]; then
-          echo "PYTHON_VERSION=3.10" >> $GITHUB_ENV
-        elif [ "${{ inputs.python-version }}" = "next" ]; then
           echo "PYTHON_VERSION=3.11" >> $GITHUB_ENV
+        elif [ "${{ inputs.python-version }}" = "next" ]; then
+          # currently disabled in GHA matrixes because of library compatibility issues
+          echo "PYTHON_VERSION=3.12" >> $GITHUB_ENV
         elif [ "${{ inputs.python-version }}" = "previous" ]; then
-          echo "PYTHON_VERSION=3.9" >> $GITHUB_ENV
+          echo "PYTHON_VERSION=3.10" >> $GITHUB_ENV
         else
           echo "PYTHON_VERSION=${{ inputs.python-version }}" >> $GITHUB_ENV
         fi
@@ -43,6 +44,7 @@ runs:
       run: |
         if [ "${{ inputs.install-superset }}" = "true" ]; then
           sudo apt-get update && sudo apt-get -y install libldap2-dev libsasl2-dev
+
           pip install --upgrade pip setuptools wheel uv
 
           if [ "${{ inputs.requirements-type }}" = "dev" ]; then

@@ -18,7 +18,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ["current", "next", "previous"]
+        python-version: ["current", "previous"]
     steps:
       - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
         uses: actions/checkout@v4

@@ -77,7 +77,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ["current", "next", "previous"]
+        python-version: ["current", "previous"]
     env:
       PYTHONPATH: ${{ github.workspace }}
       SUPERSET_CONFIG: tests.integration_tests.superset_test_config

@@ -19,7 +19,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ["current", "next"]
+        python-version: ["previous", "current"]
     env:
       PYTHONPATH: ${{ github.workspace }}
     steps:

diff --git a/Dockerfile b/Dockerfile
@@ -18,7 +18,7 @@
 ######################################################################
 # Node stage to deal with static asset construction
 ######################################################################
-ARG PY_VER=3.10-slim-bookworm
+ARG PY_VER=3.11-slim-bookworm
 
 # If BUILDPLATFORM is null, set it to 'amd64' (or leave as is otherwise).
 ARG BUILDPLATFORM=${BUILDPLATFORM:-amd64}

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,13 +24,12 @@ name = "apache-superset"
 description = "A modern, enterprise-ready business intelligence web application"
 readme = "README.md"
 dynamic = ["version", "scripts", "entry-points"]
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = { file="LICENSE.txt" }
 authors = [
     { name = "Apache Software Foundation", email = "[email protected]" },
 ]
 classifiers = [
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
@@ -67,7 +66,7 @@ dependencies = [
     "markdown>=3.0",
     "msgpack>=1.0.0, <1.1",
     "nh3>=0.2.11, <0.3",
-    "numpy==1.23.5",
+    "numpy>1.23.5, <2",
     "packaging",
     # --------------------------
     # pandas and related (wanting pandas[performance] without numba as it's 100+MB and not needed)
@@ -275,8 +274,8 @@ exclude = [
 line-length = 88
 indent-width = 4
 
-# Assume Python 3.9
-target-version = "py39"
+# Assume Python 3.10
+target-version = "py310"
 
 [tool.ruff.lint]
 # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.

diff --git a/requirements/base.in b/requirements/base.in
@@ -23,8 +23,3 @@ numexpr>=2.9.0
 # 5.0.0 has a sensitive deprecation used in other libs
 # -> https://github.com/aio-libs/async-timeout/blob/master/CHANGES.rst#500-2024-10-31
 async_timeout>=4.0.0,<5.0.0
-
-# playwright requires greenlet==3.0.3
-# submitted a PR to relax deps in 11/2024
-# https://github.com/microsoft/playwright-python/pull/2669
-greenlet==3.0.3
diff --git a/requirements/base.txt b/requirements/base.txt
@@ -146,7 +146,6 @@ google-auth==2.36.0
     # via shillelagh
 greenlet==3.0.3
     # via
-    #   -r requirements/base.in
     #   apache-superset (pyproject.toml)
     #   shillelagh
 gunicorn==23.0.0
@@ -215,7 +214,7 @@ nh3==0.2.19
     # via apache-superset (pyproject.toml)
 numexpr==2.10.2
     # via -r requirements/base.in
-numpy==1.23.5
+numpy==1.26.4
     # via
     #   apache-superset (pyproject.toml)
     #   bottleneck

diff --git a/requirements/development.txt b/requirements/development.txt
@@ -448,7 +448,7 @@ nh3==0.2.19
     #   apache-superset
 nodeenv==1.8.0
     # via pre-commit
-numpy==1.23.5
+numpy==1.26.4
     # via
     #   -c requirements/base.txt
     #   apache-superset

diff --git a/superset/commands/dashboard/export.py b/superset/commands/dashboard/export.py
@@ -83,7 +83,7 @@ def append_charts(position: dict[str, Any], charts: set[Slice]) -> dict[str, Any
             "parents": ["ROOT_ID", "GRID_ID"],
         }
 
-    for chart_hash, chart in zip(chart_hashes, charts):
+    for chart_hash, chart in zip(chart_hashes, charts, strict=False):
         position[chart_hash] = {
             "children": [],
             "id": chart_hash,

diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
@@ -1903,6 +1903,7 @@ def query_datasources_by_permissions(  # pylint: disable=invalid-name
             for method, perms in zip(
                 (SqlaTable.perm, SqlaTable.schema_perm, SqlaTable.catalog_perm),
                 (permissions, schema_perms, catalog_perms),
+                strict=False,
             )
             if perms
         ]

diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py
@@ -440,7 +440,7 @@ def where_latest_partition(
             # table is not partitioned
             return None
         if values is not None and columns is not None:
-            for col_name, value in zip(col_names, values):
+            for col_name, value in zip(col_names, values, strict=False):
                 for clm in columns:
                     if clm.get("name") == col_name:
                         query = query.where(Column(col_name) == value)

diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py
@@ -348,7 +348,9 @@ def identity(x: Any) -> Any:
                 rows = [
                     tuple(
                         sanitize_func(val)
-                        for sanitize_func, val in zip(sanitization_functions, row)
+                        for sanitize_func, val in zip(
+                            sanitization_functions, row, strict=False
+                        )
                     )
                     for row in rows
                 ]

diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
@@ -545,7 +545,7 @@ def where_latest_partition(
             column.get("column_name"): column.get("type") for column in columns or []
         }
 
-        for col_name, value in zip(col_names, values):
+        for col_name, value in zip(col_names, values, strict=False):
             col_type = column_type_by_name.get(col_name)
 
             if isinstance(col_type, str):
@@ -1240,7 +1240,7 @@ def expand_data(  # pylint: disable=too-many-locals  # noqa: C901
                     if isinstance(values, str):
                         values = cast(Optional[list[Any]], destringify(values))
                         row[name] = values
-                    for value, col in zip(values or [], expanded):
+                    for value, col in zip(values or [], expanded, strict=False):
                         row[col["column_name"]] = value
 
         data = [
@@ -1271,7 +1271,7 @@ def get_extra_table_metadata(
 
             metadata["partitions"] = {
                 "cols": sorted(indexes[0].get("column_names", [])),
-                "latest": dict(zip(col_names, latest_parts)),
+                "latest": dict(zip(col_names, latest_parts, strict=False)),
                 "partitionQuery": cls._partition_query(
                     table=table,
                     indexes=indexes,

diff --git a/superset/db_engine_specs/redshift.py b/superset/db_engine_specs/redshift.py
@@ -131,7 +131,7 @@ def df_to_sql(
             # uses the max size for redshift nvarchar(65335)
             # the default object and string types create a varchar(256)
             col_name: NVARCHAR(length=65535)
-            for col_name, type in zip(df.columns, df.dtypes)
+            for col_name, type in zip(df.columns, df.dtypes, strict=False)
             if isinstance(type, pd.StringDtype)
         }
 

diff --git a/superset/db_engine_specs/trino.py b/superset/db_engine_specs/trino.py
@@ -111,7 +111,7 @@ def get_extra_table_metadata(
                         }
                     )
                 ),
-                "latest": dict(zip(col_names, latest_parts)),
+                "latest": dict(zip(col_names, latest_parts, strict=False)),
                 "partitionQuery": cls._partition_query(
                     table=table,
                     indexes=indexes,

diff --git a/superset/extensions/metadb.py b/superset/extensions/metadb.py
@@ -412,7 +412,7 @@ def get_data(
             connection = engine.connect()
             rows = connection.execute(query)
             for i, row in enumerate(rows):
-                data = dict(zip(self.columns, row))
+                data = dict(zip(self.columns, row, strict=False))
                 data["rowid"] = data[self._rowid] if self._rowid else i
                 yield data
 

diff --git a/superset/models/helpers.py b/superset/models/helpers.py
@@ -1971,7 +1971,7 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
 
         self.make_orderby_compatible(select_exprs, orderby_exprs)
 
-        for col, (orig_col, ascending) in zip(orderby_exprs, orderby):  # noqa: B007
+        for col, (orig_col, ascending) in zip(orderby_exprs, orderby, strict=False):  # noqa: B007
             if not db_engine_spec.allows_alias_in_orderby and isinstance(col, Label):
                 # if engine does not allow using SELECT alias in ORDER BY
                 # revert to the underlying column

diff --git a/superset/result_set.py b/superset/result_set.py
@@ -123,7 +123,9 @@ def __init__(  # pylint: disable=too-many-locals  # noqa: C901
             # fix cursor descriptor with the deduped names
             deduped_cursor_desc = [
                 tuple([column_name, *list(description)[1:]])  # noqa: C409
-                for column_name, description in zip(column_names, cursor_description)
+                for column_name, description in zip(
+                    column_names, cursor_description, strict=False
+                )
             ]
 
             # generate numpy structured array dtype

diff --git a/superset/utils/excel.py b/superset/utils/excel.py
@@ -56,7 +56,7 @@ def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
 def apply_column_types(
     df: pd.DataFrame, column_types: list[GenericDataType]
 ) -> pd.DataFrame:
-    for column, column_type in zip(df.columns, column_types):
+    for column, column_type in zip(df.columns, column_types, strict=False):
         if column_type == GenericDataType.NUMERIC:
             try:
                 df[column] = pd.to_numeric(df[column])

diff --git a/superset/utils/mock_data.py b/superset/utils/mock_data.py
@@ -221,8 +221,11 @@ def get_column_objects(columns: list[ColumnInfo]) -> list[Column]:
 def generate_data(columns: list[ColumnInfo], num_rows: int) -> list[dict[str, Any]]:
     keys = [column["name"] for column in columns]
     return [
-        dict(zip(keys, row))
-        for row in zip(*[generate_column_data(column, num_rows) for column in columns])
+        dict(zip(keys, row, strict=False))
+        for row in zip(
+            *[generate_column_data(column, num_rows) for column in columns],
+            strict=False,
+        )
     ]
 
 

diff --git a/superset/utils/pandas_postprocessing/compare.py b/superset/utils/pandas_postprocessing/compare.py
@@ -59,7 +59,7 @@ def compare(  # pylint: disable=too-many-arguments
     if len(source_columns) == 0:
         return df
 
-    for s_col, c_col in zip(source_columns, compare_columns):
+    for s_col, c_col in zip(source_columns, compare_columns, strict=False):
         s_df = df.loc[:, [s_col]]
         s_df.rename(columns={s_col: "__intermediate"}, inplace=True)
         c_df = df.loc[:, [c_col]]

diff --git a/superset/utils/pandas_postprocessing/geography.py b/superset/utils/pandas_postprocessing/geography.py
@@ -40,7 +40,7 @@ def geohash_decode(
     try:
         lonlat_df = DataFrame()
         lonlat_df["latitude"], lonlat_df["longitude"] = zip(
-            *df[geohash].apply(geohash_lib.decode)
+            *df[geohash].apply(geohash_lib.decode), strict=False
         )
         return _append_columns(
             df, lonlat_df, {"latitude": latitude, "longitude": longitude}
@@ -109,7 +109,7 @@ def _parse_location(location: str) -> tuple[float, float, float]:
             geodetic_df["latitude"],
             geodetic_df["longitude"],
             geodetic_df["altitude"],
-        ) = zip(*df[geodetic].apply(_parse_location))
+        ) = zip(*df[geodetic].apply(_parse_location), strict=False)
         columns = {"latitude": latitude, "longitude": longitude}
         if altitude:
             columns["altitude"] = altitude

diff --git a/superset/utils/pandas_postprocessing/histogram.py b/superset/utils/pandas_postprocessing/histogram.py
@@ -71,7 +71,7 @@ def hist_values(series: Series) -> np.ndarray:
 
     if len(groupby) == 0:
         # without grouping
-        hist_dict = dict(zip(bin_edges_str, hist_values(df[column])))
+        hist_dict = dict(zip(bin_edges_str, hist_values(df[column]), strict=False))
         histogram_df = DataFrame(hist_dict, index=[0])
     else:
         # with grouping

diff --git a/superset/viz.py b/superset/viz.py
@@ -1779,6 +1779,7 @@ def get_data(self, df: pd.DataFrame) -> VizData:
                     df[self.form_data.get("all_columns_y")],
                     metric_col,
                     point_radius_col,
+                    strict=False,
                 )
             ],
         }
@@ -1902,6 +1903,7 @@ def process_spatial_data_obj(self, key: str, df: pd.DataFrame) -> pd.DataFrame:
                 zip(
                     pd.to_numeric(df[spatial.get("lonCol")], errors="coerce"),
                     pd.to_numeric(df[spatial.get("latCol")], errors="coerce"),
+                    strict=False,
                 )
             )
         elif spatial.get("type") == "delimited":

diff --git a/tests/integration_tests/core_tests.py b/tests/integration_tests/core_tests.py
@@ -673,7 +673,9 @@ def test_explore_json_dist_bar_order(self):
                 count_ds = series["values"]
             if series["key"] == "COUNT(name)":
                 count_name = series["values"]
-        for expected, actual_ds, actual_name in zip(resp["data"], count_ds, count_name):
+        for expected, actual_ds, actual_name in zip(
+            resp["data"], count_ds, count_name, strict=False
+        ):
             assert expected["count_name"] == actual_name["y"]
             assert expected["count_ds"] == actual_ds["y"]
 

diff --git a/tests/integration_tests/db_engine_specs/presto_tests.py b/tests/integration_tests/db_engine_specs/presto_tests.py
@@ -87,7 +87,7 @@ def verify_presto_column(self, column, expected_results):
         inspector.bind.execute.return_value.fetchall = mock.Mock(return_value=[row])
         results = PrestoEngineSpec.get_columns(inspector, Table("", ""))
         assert len(expected_results) == len(results)
-        for expected_result, result in zip(expected_results, results):
+        for expected_result, result in zip(expected_results, results, strict=False):
             assert expected_result[0] == result["column_name"]
             assert expected_result[1] == str(result["type"])
 
@@ -191,7 +191,9 @@ def test_presto_get_fields(self):
                 "label": 'column."quoted.nested obj"',
             },
         ]
-        for actual_result, expected_result in zip(actual_results, expected_results):
+        for actual_result, expected_result in zip(
+            actual_results, expected_results, strict=False
+        ):
             assert actual_result.element.name == expected_result["column_name"]
             assert actual_result.name == expected_result["label"]
 

diff --git a/tests/integration_tests/dict_import_export_tests.py b/tests/integration_tests/dict_import_export_tests.py
@@ -80,15 +80,16 @@ def create_table(
             "id": id,
             "params": json.dumps(params),
             "columns": [
-                {"column_name": c, "uuid": u} for c, u in zip(cols_names, cols_uuids)
+                {"column_name": c, "uuid": u}
+                for c, u in zip(cols_names, cols_uuids, strict=False)
             ],
             "metrics": [{"metric_name": c, "expression": ""} for c in metric_names],
         }
 
         table = SqlaTable(
             id=id, schema=schema, table_name=name, params=json.dumps(params)
         )
-        for col_name, uuid in zip(cols_names, cols_uuids):
+        for col_name, uuid in zip(cols_names, cols_uuids, strict=False):
             table.columns.append(TableColumn(column_name=col_name, uuid=uuid))
         for metric_name in metric_names:
             table.metrics.append(SqlMetric(metric_name=metric_name, expression=""))

diff --git a/tests/integration_tests/import_export_tests.py b/tests/integration_tests/import_export_tests.py
@@ -153,7 +153,7 @@ def assert_dash_equals(
         assert len(expected_dash.slices) == len(actual_dash.slices)
         expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "")
         actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "")
-        for e_slc, a_slc in zip(expected_slices, actual_slices):
+        for e_slc, a_slc in zip(expected_slices, actual_slices, strict=False):
             self.assert_slice_equals(e_slc, a_slc)
         if check_position:
             assert expected_dash.position_json == actual_dash.position_json
@@ -212,7 +212,7 @@ def assert_only_exported_slc_fields(self, expected_dash, actual_dash):
         """
         expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "")
         actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "")
-        for e_slc, a_slc in zip(expected_slices, actual_slices):
+        for e_slc, a_slc in zip(expected_slices, actual_slices, strict=False):
             params = a_slc.params_dict
             assert e_slc.datasource.name == params["datasource_name"]
             assert e_slc.datasource.schema == params["schema"]