Skip to content
This repository was archived by the owner on Jun 30, 2022. It is now read-only.

Commit 9782343

Browse files
robertwbaaltay
authored andcommitted
Augment file utils with recursive copy
----Release Notes---- [] ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=124379580
1 parent 15bef52 commit 9782343

3 files changed

Lines changed: 58 additions & 0 deletions

File tree

google/cloud/dataflow/io/fileio.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,23 @@ def rename(src, dst):
256256
except OSError as err:
257257
raise IOError(err)
258258

259+
@staticmethod
260+
def copytree(src, dst):
261+
if src.startswith('gs://'):
262+
assert dst.startswith('gs://'), dst
263+
assert src.endswith('/'), src
264+
assert dst.endswith('/'), dst
265+
# pylint: disable=g-import-not-at-top
266+
from google.cloud.dataflow.io import gcsio
267+
gcsio.GcsIO().copytree(src, dst)
268+
else:
269+
try:
270+
if os.path.exists(dst):
271+
shutil.rmtree(dst)
272+
shutil.copytree(src, dst)
273+
except OSError as err:
274+
raise IOError(err)
275+
259276
@staticmethod
260277
def exists(path):
261278
if path.startswith('gs://'):

google/cloud/dataflow/io/gcsio.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,22 @@ def copy(self, src, dest):
171171
destinationObject=dest_path)
172172
self.client.objects.Copy(request)
173173

174+
# We intentionally do not decorate this method with a retry, since the
175+
# underlying copy and delete operations are already idempotent operations
176+
# protected by retry decorators.
177+
def copytree(self, src, dest):
178+
"""Renames the given GCS "directory" recursively from src to dest.
179+
180+
Args:
181+
src: GCS file path pattern in the form gs://<bucket>/<name>/.
182+
dest: GCS file path pattern in the form gs://<bucket>/<name>/.
183+
"""
184+
assert src.endswith('/')
185+
assert dest.endswith('/')
186+
for entry in self.glob(src + '*'):
187+
rel_path = entry[len(src):]
188+
self.copy(entry, dest + rel_path)
189+
174190
# We intentionally do not decorate this method with a retry, since the
175191
# underlying copy and delete operations are already idempotent operations
176192
# protected by retry decorators.

google/cloud/dataflow/io/gcsio_test.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,31 @@ def test_copy(self):
219219
self.assertTrue(gcsio.parse_gcs_path(dest_file_name) in
220220
self.client.objects.files)
221221

222+
def test_copytree(self):
223+
src_dir_name = 'gs://gcsio-test/source/'
224+
dest_dir_name = 'gs://gcsio-test/dest/'
225+
file_size = 1024
226+
paths = ['a', 'b/c', 'b/d']
227+
for path in paths:
228+
src_file_name = src_dir_name + path
229+
dest_file_name = dest_dir_name + path
230+
self._insert_random_file(self.client, src_file_name,
231+
file_size)
232+
self.assertTrue(gcsio.parse_gcs_path(src_file_name) in
233+
self.client.objects.files)
234+
self.assertFalse(gcsio.parse_gcs_path(dest_file_name) in
235+
self.client.objects.files)
236+
237+
self.gcs.copytree(src_dir_name, dest_dir_name)
238+
239+
for path in paths:
240+
src_file_name = src_dir_name + path
241+
dest_file_name = dest_dir_name + path
242+
self.assertTrue(gcsio.parse_gcs_path(src_file_name) in
243+
self.client.objects.files)
244+
self.assertTrue(gcsio.parse_gcs_path(dest_file_name) in
245+
self.client.objects.files)
246+
222247
def test_rename(self):
223248
src_file_name = 'gs://gcsio-test/source'
224249
dest_file_name = 'gs://gcsio-test/dest'

0 commit comments

Comments
 (0)