@@ -97,7 +97,7 @@ def worker_object_to_string(worker_object):
9797WorkerGroupingShuffleRead = build_worker_instruction (
9898 'WorkerGroupingShuffleRead' ,
9999 ['start_shuffle_position' , 'end_shuffle_position' ,
100- 'shuffle_reader_config' , 'coders ' ])
100+ 'shuffle_reader_config' , 'coder ' ])
101101"""Worker details needed to read from a grouping shuffle source.
102102
103103Attributes:
@@ -108,14 +108,14 @@ def worker_object_to_string(worker_object):
108108 shuffle_reader_config: An opaque string used to initialize the shuffle
109109 reader. Contains things like connection endpoints for the shuffle
110110 server appliance and various options.
111- coders: A 2-tuple of coders (key, value) to decode shuffle entries.
111+ coder: The KV coder used to decode shuffle entries.
112112"""
113113
114114
115115WorkerUngroupedShuffleRead = build_worker_instruction (
116116 'WorkerUngroupedShuffleRead' ,
117117 ['start_shuffle_position' , 'end_shuffle_position' ,
118- 'shuffle_reader_config' , 'coders ' ])
118+ 'shuffle_reader_config' , 'coder ' ])
119119"""Worker details needed to read from an ungrouped shuffle source.
120120
121121Attributes:
@@ -126,7 +126,7 @@ def worker_object_to_string(worker_object):
126126 shuffle_reader_config: An opaque string used to initialize the shuffle
127127 reader. Contains things like connection endpoints for the shuffle
128128 server appliance and various options.
129- coders: A 2-tuple of coders (key, value) to decode shuffle entries.
129+ coder: The value coder used to decode shuffle entries.
130130"""
131131
132132
@@ -160,7 +160,7 @@ def worker_object_to_string(worker_object):
160160
161161WorkerShuffleWrite = build_worker_instruction (
162162 'WorkerShuffleWrite' ,
163- ['shuffle_kind' , 'shuffle_writer_config' , 'input' , 'coders ' ])
163+ ['shuffle_kind' , 'shuffle_writer_config' , 'input' , 'coder ' ])
164164"""Worker details needed to write to a shuffle sink.
165165
166166Attributes:
@@ -173,7 +173,8 @@ def worker_object_to_string(worker_object):
173173 input: A (producer index, output index) tuple representing the
174174 ParallelInstruction operation whose output feeds into this operation.
175175 The output index is 0 except for multi-output operations (like ParDo).
176- coders: A 2-tuple of coders (key, value) to encode shuffle entries.
176+ coder: The coder for input elements. If the shuffle_kind is grouping, this is
177+ expected to be a KV coder.
177178"""
178179
179180
@@ -370,20 +371,23 @@ def get_read_work_item(work, env, context):
370371 if source :
371372 return WorkerRead (source , tag = None )
372373
373- # TODO(mairbek) create Shuffler Source/Reader
374- kv_coders = get_coder_from_spec (codec_specs , kv_pair = True )
374+ coder = get_coder_from_spec (codec_specs )
375+ # TODO(ccy): Reconcile WindowedValueCoder wrappings for sources with custom
376+ # coders so this special case won't be necessary.
377+ if isinstance (coder , coders .WindowedValueCoder ):
378+ coder = coder .wrapped_value_coder
375379 if specs ['@type' ] == 'GroupingShuffleSource' :
376380 return WorkerGroupingShuffleRead (
377381 start_shuffle_position = specs ['start_shuffle_position' ]['value' ],
378382 end_shuffle_position = specs ['end_shuffle_position' ]['value' ],
379383 shuffle_reader_config = specs ['shuffle_reader_config' ]['value' ],
380- coders = kv_coders )
384+ coder = coder )
381385 elif specs ['@type' ] == 'UngroupedShuffleSource' :
382386 return WorkerUngroupedShuffleRead (
383387 start_shuffle_position = specs ['start_shuffle_position' ]['value' ],
384388 end_shuffle_position = specs ['end_shuffle_position' ]['value' ],
385389 shuffle_reader_config = specs ['shuffle_reader_config' ]['value' ],
386- coders = kv_coders )
390+ coder = coder )
387391 else :
388392 raise NotImplementedError ('Unknown source type: %r' % specs )
389393
@@ -452,14 +456,17 @@ def get_write_work_item(work, env, context):
452456 sink = env .parse_sink (specs , codec_specs , context )
453457 if sink :
454458 return WorkerWrite (sink , input = get_input_spec (work .write .input ))
455- # TODO(mairbek) create Shuffler Sink/Writer
456459 if specs ['@type' ] == 'ShuffleSink' :
457- kv_coders = get_coder_from_spec (codec_specs , kv_pair = True )
460+ coder = get_coder_from_spec (codec_specs )
461+ # TODO(ccy): Reconcile WindowedValueCoder wrappings for sources with custom
462+ # coders so this special case won't be necessary.
463+ if isinstance (coder , coders .WindowedValueCoder ):
464+ coder = coder .wrapped_value_coder
458465 return WorkerShuffleWrite (
459466 shuffle_kind = specs ['shuffle_kind' ]['value' ],
460467 shuffle_writer_config = specs ['shuffle_writer_config' ]['value' ],
461468 input = get_input_spec (work .write .input ),
462- coders = kv_coders )
469+ coder = coder )
463470 else :
464471 raise NotImplementedError ('Unknown sink type: %r' % specs )
465472
0 commit comments