Skip to content
This repository was archived by the owner on Jun 30, 2022. It is now read-only.

Commit 4e38084

Browse files
charlesccychenaaltay
authored andcommitted
Raise an IOError when source file in GcsIO.copy does not exist
----Release Notes---- [] ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=124753270
1 parent c752b7f commit 4e38084

2 files changed

Lines changed: 21 additions & 2 deletions

File tree

google/cloud/dataflow/io/gcsio.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ def parse_gcs_path(gcs_path):
5555
return match.group(1), match.group(2)
5656

5757

58+
class GcsIOError(IOError, retry.PermanentException):
59+
"""GCS IO error that should not be retried."""
60+
pass
61+
62+
5863
class GcsIO(object):
5964
"""Google Cloud Storage I/O client."""
6065

@@ -168,7 +173,15 @@ def copy(self, src, dest):
168173
sourceObject=src_path,
169174
destinationBucket=dest_bucket,
170175
destinationObject=dest_path)
171-
self.client.objects.Copy(request)
176+
try:
177+
self.client.objects.Copy(request)
178+
except HttpError as http_error:
179+
if http_error.status_code == 404:
180+
# This is a permanent error that should not be retried. Note that
181+
# FileSink.finalize_write expects an IOError when the source file does
182+
# not exist.
183+
raise GcsIOError(errno.ENOENT, 'Source file not found: %s' % src)
184+
raise
172185

173186
# We intentionally do not decorate this method with a retry, since the
174187
# underlying copy and delete operations are already idempotent operations

google/cloud/dataflow/io/gcsio_test.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,9 @@ def Insert(self, insert_request, upload=None): # pylint: disable=invalid-name
109109
def Copy(self, copy_request): # pylint: disable=invalid-name
110110
src_file = self.get_file(copy_request.sourceBucket,
111111
copy_request.sourceObject)
112-
assert src_file is not None
112+
if not src_file:
113+
raise HttpError(httplib2.Response({'status': '404'}), '404 Not Found',
114+
'https://fake/url')
113115
generation = self.get_last_generation(copy_request.destinationBucket,
114116
copy_request.destinationObject) + 1
115117
dest_file = FakeFile(copy_request.destinationBucket,
@@ -219,6 +221,10 @@ def test_copy(self):
219221
self.assertTrue(gcsio.parse_gcs_path(dest_file_name) in
220222
self.client.objects.files)
221223

224+
self.assertRaises(IOError, self.gcs.copy,
225+
'gs://gcsio-test/non-existent',
226+
'gs://gcsio-test/non-existent-destination')
227+
222228
def test_copytree(self):
223229
src_dir_name = 'gs://gcsio-test/source/'
224230
dest_dir_name = 'gs://gcsio-test/dest/'

0 commit comments

Comments
 (0)