Skip to content
This repository was archived by the owner on Jun 30, 2022. It is now read-only.

Commit 5505162

Browse files
robertwbgildea
authored andcommitted
Eliminate the fallback to dill when pickling fails for data
If desired, a DillPickler can be explicitly used instead. ----Release Notes---- [] ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=117143518
1 parent 4ffb9f5 commit 5505162

2 files changed

Lines changed: 46 additions & 18 deletions

File tree

google/cloud/dataflow/coders/coders.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def is_deterministic(self):
236236
return True
237237

238238

239-
def dumps(o):
239+
def maybe_dill_dumps(o):
240240
"""Pickle using cPickle or the Dill pickler as a fallback."""
241241
# We need to use the dill pickler for objects of certain custom classes,
242242
# including, for example, ones that contain lambdas.
@@ -246,19 +246,16 @@ def dumps(o):
246246
return dill.dumps(o)
247247

248248

249-
def loads(o):
249+
def maybe_dill_loads(o):
250250
"""Unpickle using cPickle or the Dill pickler as a fallback."""
251251
try:
252252
return pickle.loads(o)
253253
except Exception: # pylint: disable=broad-except
254254
return dill.loads(o)
255255

256256

257-
class PickleCoder(FastCoder):
258-
"""Coder using Python's pickle functionality."""
259-
260-
def _create_impl(self):
261-
return coder_impl.CallbackCoderImpl(dumps, loads)
257+
class _PickleCoderBase(FastCoder):
258+
"""Base class for pickling coders."""
262259

263260
def is_deterministic(self):
264261
# Note that the default coder, the PickleCoder, is not deterministic (for
@@ -268,7 +265,7 @@ def is_deterministic(self):
268265
return False
269266

270267
def as_cloud_object(self, is_pair_like=True):
271-
value = super(PickleCoder, self).as_cloud_object()
268+
value = super(_PickleCoderBase, self).as_cloud_object()
272269
# We currently use this coder in places where we cannot infer the coder to
273270
# use for the value type in a more granular way. In places where the
274271
# service expects a pair, it checks for the "is_pair_like" key, in which
@@ -295,6 +292,20 @@ def value_coder(self):
295292
return self
296293

297294

295+
class PickleCoder(_PickleCoderBase):
296+
"""Coder using Python's pickle functionality."""
297+
298+
def _create_impl(self):
299+
return coder_impl.CallbackCoderImpl(pickle.dumps, pickle.loads)
300+
301+
302+
class DillCoder(_PickleCoderBase):
303+
"""Coder using dill's pickle functionality."""
304+
305+
def _create_impl(self):
306+
return coder_impl.CallbackCoderImpl(maybe_dill_dumps, maybe_dill_loads)
307+
308+
298309
class DeterministicPickleCoder(FastCoder):
299310
"""Throws runtime errors when pickling non-deterministic values."""
300311

google/cloud/dataflow/coders/coders_test_common.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,21 @@
1919
import sys
2020
import unittest
2121

22+
import dill
23+
2224
import coders
2325

2426

27+
# Defined out of line for picklability.
28+
class CustomCoder(coders.Coder):
29+
30+
def encode(self, x):
31+
return str(x+1)
32+
33+
def decode(self, encoded):
34+
return int(encoded) - 1
35+
36+
2537
class CodersTest(unittest.TestCase):
2638

2739
# These class methods ensure that we test each defined coder in both
@@ -34,9 +46,10 @@ def setUpClass(cls):
3446

3547
@classmethod
3648
def tearDownClass(cls):
37-
standard = set(
38-
c for c in coders.__dict__.values()
39-
if isinstance(c, type) and issubclass(c, coders.Coder))
49+
standard = set(c
50+
for c in coders.__dict__.values()
51+
if isinstance(c, type) and issubclass(c, coders.Coder) and
52+
'Base' not in c.__name__)
4053
standard -= set([coders.Coder, coders.ToStringCoder, coders.FloatCoder,
4154
coders.Base64PickleCoder, coders.FastCoder,
4255
coders.WindowCoder, coders.WindowedValueCoder])
@@ -59,15 +72,12 @@ def check_coder(self, coder, *values):
5972
self._observe(coder)
6073
for v in values:
6174
self.assertEqual(v, coder.decode(coder.encode(v)))
75+
copy1 = dill.loads(dill.dumps(coder))
76+
copy2 = dill.loads(dill.dumps(coder))
77+
for v in values:
78+
self.assertEqual(v, copy1.decode(copy2.encode(v)))
6279

6380
def test_custom_coder(self):
64-
class CustomCoder(coders.Coder):
65-
66-
def encode(self, x):
67-
return str(x+1)
68-
69-
def decode(self, encoded):
70-
return int(encoded) - 1
7181

7282
self.check_coder(CustomCoder(), 1, -10, 5)
7383
self.check_coder(coders.TupleCoder((CustomCoder(), coders.BytesCoder())),
@@ -87,6 +97,13 @@ def test_deterministic_pickle_coder(self):
8797
self.check_coder(coders.TupleCoder((coder, coders.PickleCoder())),
8898
(1, dict()), ('a', [dict()]))
8999

100+
def test_dill_coder(self):
101+
cell_value = (lambda x: lambda: x)(0).func_closure[0]
102+
self.check_coder(coders.DillCoder(), 'a', 1, cell_value)
103+
self.check_coder(
104+
coders.TupleCoder((coders.VarIntCoder(), coders.DillCoder())),
105+
(1, cell_value))
106+
90107
def test_bytes_coder(self):
91108
self.check_coder(coders.BytesCoder(), 'a', '\0', 'z' * 1000)
92109

0 commit comments

Comments
 (0)