|
30 | 30 | class PValue(object): |
31 | 31 | """Base class for PCollection. |
32 | 32 |
|
| 33 | + Dataflow users should not construct PValue objects directly in their |
| 34 | + pipelines. |
| 35 | +
|
33 | 36 | A PValue has the following main characteristics: |
34 | 37 | (1) Belongs to a pipeline. Added during object initialization. |
35 | 38 | (2) Has a transform that can compute the value if executed. |
36 | 39 | (3) Has a value which is meaningful if the transform was executed. |
37 | 40 | """ |
38 | 41 |
|
39 | | - def __init__(self, **kwargs): |
| 42 | + def __init__(self, pipeline, tag=None, element_type=None): |
40 | 43 | """Initializes a PValue with all arguments hidden behind keyword arguments. |
41 | 44 |
|
42 | 45 | Args: |
43 | | - **kwargs: keyword arguments. |
44 | | -
|
45 | | - Raises: |
46 | | - ValueError: if the expected keyword arguments (pipeline, transform, |
47 | | - and optionally tag) are not present. |
48 | | -
|
49 | | - The method expects a pipeline and a transform keyword argument. However in |
50 | | - order to give a signal to users that they should not create these PValues |
51 | | - directly we obfuscate the arguments. |
| 46 | + pipeline: Pipeline object for this PValue. |
| 47 | + tag: Tag of this PValue. |
| 48 | + element_type: The type of this PValue. |
52 | 49 | """ |
53 | | - if 'pipeline' not in kwargs or 'transform' not in kwargs: |
54 | | - raise ValueError( |
55 | | - 'Missing required arguments (pipeline and transform): %s' |
56 | | - % kwargs.keys) |
57 | | - self.pipeline = kwargs.pop('pipeline') |
58 | | - # TODO(silviuc): Remove usage of the transform argument from all call sites. |
59 | | - # It is not used anymore and has been replaced with the producer attribute. |
60 | | - kwargs.pop('transform') |
61 | | - self.tag = kwargs.pop('tag', None) |
62 | | - self.element_type = kwargs.pop('element_type', None) |
63 | | - if kwargs: |
64 | | - raise ValueError('Unexpected keyword arguments: %s' % kwargs.keys()) |
| 50 | + self.pipeline = pipeline |
| 51 | + self.tag = tag |
| 52 | + self.element_type = element_type |
65 | 53 | self.pipeline._add_pvalue(self) |
66 | 54 | # The AppliedPTransform instance for the application of the PTransform |
67 | 55 | # generating this PValue. The field gets initialized when a transform |
@@ -103,11 +91,15 @@ def __or__(self, ptransform): |
103 | 91 |
|
104 | 92 |
|
105 | 93 | class PCollection(PValue): |
106 | | - """A multiple values (potentially huge) container.""" |
| 94 | + """A multiple values (potentially huge) container. |
| 95 | +
|
| 96 | + Dataflow users should not construct PCollection objects directly in their |
| 97 | + pipelines. |
| 98 | + """ |
107 | 99 |
|
108 | | - def __init__(self, **kwargs): |
| 100 | + def __init__(self, pipeline, **kwargs): |
109 | 101 | """Initializes a PCollection. Do not call directly.""" |
110 | | - super(PCollection, self).__init__(**kwargs) |
| 102 | + super(PCollection, self).__init__(pipeline, **kwargs) |
111 | 103 |
|
112 | 104 | @property |
113 | 105 | def windowing(self): |
@@ -216,10 +208,7 @@ def __getitem__(self, tag): |
216 | 208 | return self._pcolls[tag] |
217 | 209 | if tag is not None: |
218 | 210 | self._transform.side_output_tags.add(tag) |
219 | | - pcoll = PCollection( |
220 | | - pipeline=self._pipeline, |
221 | | - transform=self._transform, |
222 | | - tag=tag) |
| 211 | + pcoll = PCollection(self._pipeline, tag=tag) |
223 | 212 | # Transfer the producer from the DoOutputsTuple to the resulting |
224 | 213 | # PCollection. |
225 | 214 | pcoll.producer = self.producer |
|
0 commit comments