Add autoscaling pipeline options

robertwb · aaltay · commit 2f9e11c52819 · 2016-06-10T08:55:14.000-07:00
----Release Notes---- [] ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=124417190
diff --git a/google/cloud/dataflow/internal/apiclient.py b/google/cloud/dataflow/internal/apiclient.py
@@ -224,9 +224,19 @@ def __init__(self, packages, options, environment_version):
             parallelWorkerSettings=dataflow.WorkerSettings(
                 baseUrl='https://dataflow.googleapis.com',
                 servicePath=self.google_cloud_options.dataflow_endpoint)))
+    pool.autoscalingSettings = dataflow.AutoscalingSettings()
     # Set worker pool options received through command line.
     if self.worker_options.num_workers:
       pool.numWorkers = self.worker_options.num_workers
+    if self.worker_options.max_num_workers:
+      pool.autoscalingSettings.maxNumWorkers = (
+          self.worker_options.max_num_workers)
+    if self.worker_options.autoscaling_algorithm:
+      values_enum = dataflow.AutoscalingSettings.AlgorithmValueValuesEnum
+      pool.autoscalingSettings.algorithm = {
+          'NONE': values_enum.AUTOSCALING_ALGORITHM_NONE,
+          'THROUGHPUT_BASED': values_enum.AUTOSCALING_ALGORITHM_BASIC,
+      }.get(self.worker_options.autoscaling_algorithm)
     if self.worker_options.machine_type:
       pool.machineType = self.worker_options.machine_type
     if self.worker_options.disk_size_gb:
diff --git a/google/cloud/dataflow/utils/options.py b/google/cloud/dataflow/utils/options.py
@@ -269,6 +269,19 @@ def _add_argparse_args(cls, parser):
         help=
         ('Number of workers to use when executing the Dataflow job. If not '
          'set, the Dataflow service will use a reasonable default.'))
+    parser.add_argument(
+        '--max_num_workers',
+        type=int,
+        default=None,
+        help=
+        ('Maximum number of workers to use when executing the Dataflow job.'))
+    parser.add_argument(
+        '--autoscaling_algorithm',
+        type=str,
+        choices=['NONE', 'THROUGHPUT_BASED'],
+        default=None,  # Meaning unset, distinct from 'NONE' meaning don't scale
+        help=
+        ('If and how to auotscale the workerpool.'))
     # TODO(silviuc): Remove --machine_type variant of the flag.
     parser.add_argument(
         '--worker_machine_type', '--machine_type',
@@ -428,9 +441,6 @@ def _add_argparse_args(cls, parser):
          'workers will install them in same order they were specified on the '
          'command line.'))
 
-# TODO(silviuc): Add autoscaling related options:
-# --autoscaling_algorithm, --max_num_workers.
-
 # TODO(silviuc): Add --files_to_stage option.
 # This could potentially replace the --requirements_file and --setup_file.