From 02593b370525c30577883f26be792f1d6ff31c88 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 07:05:49 -0400 Subject: [PATCH 1/5] Raise jsonpickle upper bound. --- sdks/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 961f890e1edf..383feab23a8e 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -426,7 +426,7 @@ def get_portability_package_data(): 'fasteners>=0.3,<1.0', 'grpcio>=1.33.1,<2,!=1.48.0,!=1.59.*,!=1.60.*,!=1.61.*,!=1.62.0,!=1.62.1,!=1.66.*,!=1.67.*,!=1.68.*,!=1.69.*,!=1.70.*', # pylint: disable=line-too-long 'httplib2>=0.8,<0.32.0', - 'jsonpickle>=3.0.0,<4.0.0', + 'jsonpickle>=3.0.0,<5.0.0', # numpy can have breaking changes in minor versions. # Use a strict upper bound. 'numpy>=1.14.3,<2.5.0', # Update pyproject.toml as well. From 8157a8c36ca77276f66c48bd59280549edec417a Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 08:29:33 -0400 Subject: [PATCH 2/5] add compat guard --- sdks/python/apache_beam/ml/transforms/base.py | 14 ++++++- .../apache_beam/ml/transforms/base_test.py | 42 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/transforms/base.py b/sdks/python/apache_beam/ml/transforms/base.py index f52e9df40566..6ed53021a94e 100644 --- a/sdks/python/apache_beam/ml/transforms/base.py +++ b/sdks/python/apache_beam/ml/transforms/base.py @@ -42,6 +42,7 @@ from apache_beam.ml.inference.base import ModelT from apache_beam.ml.inference.base import RunInferenceDLQ from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options_context import get_pipeline_options _LOGGER = logging.getLogger(__name__) _ATTRIBUTE_FILE_NAME = 'attributes.json' @@ -591,7 +592,18 @@ def save_attributes( def load_attributes(artifact_location): with FileSystems.open(os.path.join(artifact_location, _ATTRIBUTE_FILE_NAME), 'rb') as f: - return jsonpickle.decode(f.read()) + # load_attributes runs eagerly during MLTransform.expand() at pipeline + # construction time, so the pipeline's options are available via the + # construction-time context. + pipeline_options = get_pipeline_options() + safe = True + if (pipeline_options is not None and + pipeline_options.is_compat_version_prior_to("2.75.0")): + # Keep the pre-2.75.0 jsonpickle behavior (safe=False permits + # eval-based decoding) for backwards compatibility with already-staged + # artifacts. + safe = False + return jsonpickle.decode(f.read(), safe=safe) _transform_attribute_manager = _JsonPickleTransformAttributeManager diff --git a/sdks/python/apache_beam/ml/transforms/base_test.py b/sdks/python/apache_beam/ml/transforms/base_test.py index 64d0afe955d0..3082f9f2c18c 100644 --- a/sdks/python/apache_beam/ml/transforms/base_test.py +++ b/sdks/python/apache_beam/ml/transforms/base_test.py @@ -26,6 +26,7 @@ from dataclasses import dataclass from typing import Any from typing import Optional +from unittest import mock import numpy as np from parameterized import param @@ -36,6 +37,7 @@ from apache_beam.ml.inference.base import ModelHandler from apache_beam.ml.inference.base import RunInference from apache_beam.ml.transforms import base +from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to @@ -841,6 +843,46 @@ def test_save_and_load_run_inference(self): self.assertListEqual( get_keys(model_handler), get_keys(loaded_model_handler)) + @parameterized.expand([ + # Pipelines pinned to a version older than 2.75.0 keep the pre-2.75.0 + # jsonpickle behavior (safe=False, which permits eval-based decoding). + param(update_compatibility_version='2.74.0', expected_safe=False), + # The breaking-change version itself and newer decode securely. + param(update_compatibility_version='2.75.0', expected_safe=True), + # Pipelines that do not set the option (the common case) decode securely. + param(update_compatibility_version=None, expected_safe=True), + ]) + def test_load_attributes_safe_flag_follows_compat_version( + self, update_compatibility_version, expected_safe): + data = [{'x': 'Hello world'}, {'x': 'Apache Beam'}] + with beam.Pipeline() as p: + _ = ( + p + | beam.Create(data) + | base.MLTransform( + write_artifact_location=self.artifact_location).with_transform( + FakeEmbeddingsManager(columns=['x']))) + + # FakeEmbeddingsManager reverses the values of the embedded columns. + expected_data = [{'x': d['x'][::-1]} for d in data] + + options = PipelineOptions( + update_compatibility_version=update_compatibility_version) + with mock.patch.object(base.jsonpickle, + 'decode', + wraps=base.jsonpickle.decode) as mock_decode: + with beam.Pipeline(options=options) as p: + result = ( + p + | beam.Create(data) + | base.MLTransform(read_artifact_location=self.artifact_location)) + assert_that(result, equal_to(expected_data)) + + safe_flags = [ + call.kwargs.get('safe') for call in mock_decode.call_args_list + ] + self.assertEqual(safe_flags, [expected_safe]) + def test_mltransform_to_ptransform_wrapper(self): transforms = [ FakeEmbeddingsManager(columns=['x']), From 61f84ee606cb9901694f0d314035a99576ddf516 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 2 Jun 2026 08:50:11 -0400 Subject: [PATCH 3/5] Increase lower bound --- sdks/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 383feab23a8e..189aaea56e31 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -426,7 +426,7 @@ def get_portability_package_data(): 'fasteners>=0.3,<1.0', 'grpcio>=1.33.1,<2,!=1.48.0,!=1.59.*,!=1.60.*,!=1.61.*,!=1.62.0,!=1.62.1,!=1.66.*,!=1.67.*,!=1.68.*,!=1.69.*,!=1.70.*', # pylint: disable=line-too-long 'httplib2>=0.8,<0.32.0', - 'jsonpickle>=3.0.0,<5.0.0', + 'jsonpickle>=3.0.4,<5.0.0', # numpy can have breaking changes in minor versions. # Use a strict upper bound. 'numpy>=1.14.3,<2.5.0', # Update pyproject.toml as well. From fdbd354ec685d73c4a80e5fd0ae85c8c2f1cabd3 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 11:21:49 -0400 Subject: [PATCH 4/5] remove compat check --- sdks/python/apache_beam/ml/transforms/base.py | 13 +----- .../apache_beam/ml/transforms/base_test.py | 42 ------------------- 2 files changed, 1 insertion(+), 54 deletions(-) diff --git a/sdks/python/apache_beam/ml/transforms/base.py b/sdks/python/apache_beam/ml/transforms/base.py index 6ed53021a94e..a6a90ee76405 100644 --- a/sdks/python/apache_beam/ml/transforms/base.py +++ b/sdks/python/apache_beam/ml/transforms/base.py @@ -592,18 +592,7 @@ def save_attributes( def load_attributes(artifact_location): with FileSystems.open(os.path.join(artifact_location, _ATTRIBUTE_FILE_NAME), 'rb') as f: - # load_attributes runs eagerly during MLTransform.expand() at pipeline - # construction time, so the pipeline's options are available via the - # construction-time context. - pipeline_options = get_pipeline_options() - safe = True - if (pipeline_options is not None and - pipeline_options.is_compat_version_prior_to("2.75.0")): - # Keep the pre-2.75.0 jsonpickle behavior (safe=False permits - # eval-based decoding) for backwards compatibility with already-staged - # artifacts. - safe = False - return jsonpickle.decode(f.read(), safe=safe) + return jsonpickle.decode(f.read()) _transform_attribute_manager = _JsonPickleTransformAttributeManager diff --git a/sdks/python/apache_beam/ml/transforms/base_test.py b/sdks/python/apache_beam/ml/transforms/base_test.py index 3082f9f2c18c..64d0afe955d0 100644 --- a/sdks/python/apache_beam/ml/transforms/base_test.py +++ b/sdks/python/apache_beam/ml/transforms/base_test.py @@ -26,7 +26,6 @@ from dataclasses import dataclass from typing import Any from typing import Optional -from unittest import mock import numpy as np from parameterized import param @@ -37,7 +36,6 @@ from apache_beam.ml.inference.base import ModelHandler from apache_beam.ml.inference.base import RunInference from apache_beam.ml.transforms import base -from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to @@ -843,46 +841,6 @@ def test_save_and_load_run_inference(self): self.assertListEqual( get_keys(model_handler), get_keys(loaded_model_handler)) - @parameterized.expand([ - # Pipelines pinned to a version older than 2.75.0 keep the pre-2.75.0 - # jsonpickle behavior (safe=False, which permits eval-based decoding). - param(update_compatibility_version='2.74.0', expected_safe=False), - # The breaking-change version itself and newer decode securely. - param(update_compatibility_version='2.75.0', expected_safe=True), - # Pipelines that do not set the option (the common case) decode securely. - param(update_compatibility_version=None, expected_safe=True), - ]) - def test_load_attributes_safe_flag_follows_compat_version( - self, update_compatibility_version, expected_safe): - data = [{'x': 'Hello world'}, {'x': 'Apache Beam'}] - with beam.Pipeline() as p: - _ = ( - p - | beam.Create(data) - | base.MLTransform( - write_artifact_location=self.artifact_location).with_transform( - FakeEmbeddingsManager(columns=['x']))) - - # FakeEmbeddingsManager reverses the values of the embedded columns. - expected_data = [{'x': d['x'][::-1]} for d in data] - - options = PipelineOptions( - update_compatibility_version=update_compatibility_version) - with mock.patch.object(base.jsonpickle, - 'decode', - wraps=base.jsonpickle.decode) as mock_decode: - with beam.Pipeline(options=options) as p: - result = ( - p - | beam.Create(data) - | base.MLTransform(read_artifact_location=self.artifact_location)) - assert_that(result, equal_to(expected_data)) - - safe_flags = [ - call.kwargs.get('safe') for call in mock_decode.call_args_list - ] - self.assertEqual(safe_flags, [expected_safe]) - def test_mltransform_to_ptransform_wrapper(self): transforms = [ FakeEmbeddingsManager(columns=['x']), From 20dadc51070940aa21ea4a6284a2a93c25648099 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 11:22:57 -0400 Subject: [PATCH 5/5] remove import --- sdks/python/apache_beam/ml/transforms/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/transforms/base.py b/sdks/python/apache_beam/ml/transforms/base.py index a6a90ee76405..f52e9df40566 100644 --- a/sdks/python/apache_beam/ml/transforms/base.py +++ b/sdks/python/apache_beam/ml/transforms/base.py @@ -42,7 +42,6 @@ from apache_beam.ml.inference.base import ModelT from apache_beam.ml.inference.base import RunInferenceDLQ from apache_beam.options.pipeline_options import PipelineOptions -from apache_beam.options.pipeline_options_context import get_pipeline_options _LOGGER = logging.getLogger(__name__) _ATTRIBUTE_FILE_NAME = 'attributes.json'