Skip to content

Commit 584b7e1

Browse files
authored
Add 'last modified' to S3 object (#778)
* last_modified to S3Object * run black
1 parent 0f09a1d commit 584b7e1

File tree

2 files changed

+26
-3
lines changed

2 files changed

+26
-3
lines changed

metaflow/datatools/s3.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ def __init__(
9797
content_type=None,
9898
metadata=None,
9999
range_info=None,
100+
last_modified=None,
100101
):
101102

102103
# all fields of S3Object should return a unicode object
@@ -107,6 +108,7 @@ def __init__(
107108
self._path = path
108109
self._key = None
109110
self._content_type = content_type
111+
self._last_modified = last_modified
110112

111113
self._metadata = None
112114
if metadata is not None and "metaflow-user-attributes" in metadata:
@@ -237,6 +239,14 @@ def range_info(self):
237239
"""
238240
return self._range_info
239241

242+
@property
243+
def last_modified(self):
244+
"""
245+
Returns the last modified unix timestamp of the object, or None
246+
if not fetched.
247+
"""
248+
return self._last_modified
249+
240250
def __str__(self):
241251
if self._path:
242252
return "<S3Object %s (%d bytes, local)>" % (self._url, self._size)
@@ -486,6 +496,7 @@ def _info(s3, tmp):
486496
"content_type": resp["ContentType"],
487497
"metadata": resp["Metadata"],
488498
"size": resp["ContentLength"],
499+
"last_modified": resp["LastModified"].timestamp(),
489500
}
490501

491502
info_results = None
@@ -504,6 +515,7 @@ def _info(s3, tmp):
504515
size=info_results["size"],
505516
content_type=info_results["content_type"],
506517
metadata=info_results["metadata"],
518+
last_modified=info_results["last_modified"],
507519
)
508520
return S3Object(self._s3root, url, None)
509521

@@ -547,7 +559,7 @@ def _head():
547559
else:
548560
yield self._s3root, s3url, None, info["size"], info[
549561
"content_type"
550-
], info["metadata"]
562+
], info["metadata"], None, info["last_modified"]
551563
else:
552564
# This should not happen; we should always get a response
553565
# even if it contains an error inside it
@@ -593,6 +605,7 @@ def _download(s3, tmp):
593605
return {
594606
"content_type": resp["ContentType"],
595607
"metadata": resp["Metadata"],
608+
"last_modified": resp["LastModified"].timestamp(),
596609
}
597610
return None
598611

@@ -611,6 +624,7 @@ def _download(s3, tmp):
611624
path,
612625
content_type=addl_info["content_type"],
613626
metadata=addl_info["metadata"],
627+
last_modified=addl_info["last_modified"],
614628
)
615629
return S3Object(self._s3root, url, path)
616630

@@ -652,7 +666,9 @@ def _get():
652666
info = json.load(f)
653667
yield self._s3root, s3url, os.path.join(
654668
self._tmpdir, fname
655-
), None, info["content_type"], info["metadata"]
669+
), None, info["content_type"], info["metadata"], None, info[
670+
"last_modified"
671+
]
656672
else:
657673
yield self._s3root, s3prefix, None
658674
else:
@@ -694,7 +710,9 @@ def _get():
694710
info = json.load(f)
695711
yield self._s3root, s3url, os.path.join(
696712
self._tmpdir, fname
697-
), None, info["content_type"], info["metadata"]
713+
), None, info["content_type"], info["metadata"], None, info[
714+
"last_modified"
715+
]
698716
else:
699717
yield s3prefix, s3url, os.path.join(self._tmpdir, fname)
700718

@@ -1023,6 +1041,7 @@ def _s3op_with_retries(self, mode, **options):
10231041
raise MetaflowS3NotFound(err_out)
10241042
elif ex.returncode == s3op.ERROR_URL_ACCESS_DENIED:
10251043
raise MetaflowS3AccessDenied(err_out)
1044+
print("Error with S3 operation:", err_out)
10261045
time.sleep(2 ** i + random.randint(0, 10))
10271046

10281047
return None, err_out

metaflow/datatools/s3op.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def op_info(url):
120120
"size": head["ContentLength"],
121121
"content_type": head["ContentType"],
122122
"metadata": head["Metadata"],
123+
"last_modified": head["LastModified"].timestamp(),
123124
}
124125
except client_error as err:
125126
error_code = normalize_client_error(err)
@@ -183,12 +184,15 @@ def op_info(url):
183184
# TODO specific error message for out of disk space
184185
# If we need the metadata, get it and write it out
185186
if pre_op_info:
187+
186188
with open("%s_meta" % url.local, mode="w") as f:
187189
args = {"size": resp["ContentLength"]}
188190
if resp["ContentType"]:
189191
args["content_type"] = resp["ContentType"]
190192
if resp["Metadata"] is not None:
191193
args["metadata"] = resp["Metadata"]
194+
if resp["LastModified"]:
195+
args["last_modified"] = resp["LastModified"].timestamp()
192196
json.dump(args, f)
193197
# Finally, we push out the size to the result_pipe since
194198
# the size is used for verification and other purposes and

0 commit comments

Comments
 (0)