42
42
from nautilus_trader .core .correctness import PyCondition
43
43
from nautilus_trader .core .data import Data
44
44
from nautilus_trader .core .datetime import dt_to_unix_nanos
45
+ from nautilus_trader .core .datetime import maybe_dt_to_unix_nanos
45
46
from nautilus_trader .core .datetime import time_object_to_dt
47
+ from nautilus_trader .core .datetime import unix_nanos_to_iso8601
46
48
from nautilus_trader .core .inspect import is_nautilus_class
47
49
from nautilus_trader .core .message import Event
48
50
from nautilus_trader .core .nautilus_pyo3 import DataBackendSession
@@ -313,7 +315,8 @@ def _write_chunk(
313
315
314
316
start = start if start else data [0 ].ts_init
315
317
end = end if end else data [- 1 ].ts_init
316
- parquet_file = f"{ directory } /{ start } -{ end } .parquet"
318
+ filename = _timestamps_to_filename (start , end )
319
+ parquet_file = f"{ directory } /{ filename } "
317
320
pq .write_table (
318
321
table ,
319
322
where = parquet_file ,
@@ -390,13 +393,19 @@ def extend_file_name(
390
393
391
394
for interval in intervals :
392
395
if interval [0 ] == end + 1 :
393
- old_path = os .path .join (directory , f"{ interval [0 ]} -{ interval [1 ]} .parquet" )
394
- new_path = os .path .join (directory , f"{ start } -{ interval [1 ]} .parquet" )
396
+ old_path = os .path .join (
397
+ directory ,
398
+ _timestamps_to_filename (interval [0 ], interval [1 ]),
399
+ )
400
+ new_path = os .path .join (directory , _timestamps_to_filename (start , interval [1 ]))
395
401
self .fs .rename (old_path , new_path )
396
402
break
397
403
elif interval [1 ] == start - 1 :
398
- old_path = os .path .join (directory , f"{ interval [0 ]} -{ interval [1 ]} .parquet" )
399
- new_path = os .path .join (directory , f"{ interval [0 ]} -{ end } .parquet" )
404
+ old_path = os .path .join (
405
+ directory ,
406
+ _timestamps_to_filename (interval [0 ], interval [1 ]),
407
+ )
408
+ new_path = os .path .join (directory , _timestamps_to_filename (interval [0 ], end ))
400
409
self .fs .rename (old_path , new_path )
401
410
break
402
411
@@ -482,7 +491,7 @@ def _reset_file_names(self, directory: str) -> None:
482
491
if first_ts == - 1 :
483
492
continue
484
493
485
- new_filename = f" { first_ts } - { last_ts } .parquet"
494
+ new_filename = _timestamps_to_filename ( first_ts , last_ts )
486
495
new_path = os .path .join (os .path .dirname (file ), new_filename )
487
496
self .fs .rename (file , new_path )
488
497
@@ -609,7 +618,10 @@ def _consolidate_directory(
609
618
if ensure_contiguous_files :
610
619
assert _are_intervals_contiguous (intervals )
611
620
612
- new_file_name = os .path .join (directory , f"{ intervals [0 ][0 ]} -{ intervals [- 1 ][1 ]} .parquet" )
621
+ new_file_name = os .path .join (
622
+ directory ,
623
+ _timestamps_to_filename (intervals [0 ][0 ], intervals [- 1 ][1 ]),
624
+ )
613
625
files_to_consolidate .sort ()
614
626
self ._combine_parquet_files (files_to_consolidate , new_file_name )
615
627
@@ -1378,6 +1390,18 @@ def convert_stream_to_data(
1378
1390
used_catalog .write_data (all_data )
1379
1391
1380
1392
1393
+ def _timestamps_to_filename (timestamp_1 : int , timestamp_2 : int ) -> str :
1394
+ datetime_1 = _iso_timestamp_to_file_timestamp (unix_nanos_to_iso8601 (timestamp_1 ))
1395
+ datetime_2 = _iso_timestamp_to_file_timestamp (unix_nanos_to_iso8601 (timestamp_2 ))
1396
+
1397
+ return f"{ datetime_1 } _{ datetime_2 } .parquet"
1398
+
1399
+
1400
+ def _iso_timestamp_to_file_timestamp (iso_timestamp : str ) -> str :
1401
+ # Assumes format YYYY-MM-DDTHH:MM:SS.nanosecondsZ, "2023-10-26T07:30:50.123456789Z" becomes "2023-10-26T07-30-50-123456789Z"
1402
+ return iso_timestamp .replace (":" , "-" ).replace ("." , "-" )
1403
+
1404
+
1381
1405
def _query_intersects_filename (
1382
1406
filename : str ,
1383
1407
start : pd .Timestamp | None ,
@@ -1395,17 +1419,31 @@ def _query_intersects_filename(
1395
1419
1396
1420
def _parse_filename_timestamps (filename : str ) -> tuple [int , int ] | None :
1397
1421
base_filename = os .path .splitext (os .path .basename (filename ))[0 ]
1398
- match = re .match (r"(\d+)-(\d+ )" , base_filename )
1422
+ match = re .match (r"(.*?)_(.* )" , base_filename )
1399
1423
1400
1424
if not match :
1401
1425
return None
1402
1426
1403
- first_ts = int (match .group (1 ))
1404
- last_ts = int (match .group (2 ))
1427
+ first_ts = maybe_dt_to_unix_nanos (_file_timestamp_to_iso_timestamp (match .group (1 )))
1428
+ last_ts = maybe_dt_to_unix_nanos (_file_timestamp_to_iso_timestamp (match .group (2 )))
1429
+
1430
+ if not first_ts or not last_ts :
1431
+ return None
1405
1432
1406
1433
return (first_ts , last_ts )
1407
1434
1408
1435
1436
+ def _file_timestamp_to_iso_timestamp (file_timestamp : str ) -> str :
1437
+ # Assumes format YYYY-MM-DDTHH-MM-SS-nanosecondsZ, "2023-10-26T07-30-50-123456789Z" becomes "2023-10-26T07:30:50.123456789Z"
1438
+ date_part , time_part = file_timestamp .split ("T" )
1439
+ time_part = time_part [:- 1 ]
1440
+ last_hyphen_idx = time_part .rfind ("-" )
1441
+ time_with_dot_for_nanos = time_part [:last_hyphen_idx ] + "." + time_part [last_hyphen_idx + 1 :]
1442
+ final_time_part = time_with_dot_for_nanos .replace ("-" , ":" )
1443
+
1444
+ return f"{ date_part } T{ final_time_part } Z"
1445
+
1446
+
1409
1447
def _min_max_from_parquet_metadata (file_path : str , column_name : str ) -> tuple [int , int ]:
1410
1448
parquet_file = pq .ParquetFile (file_path )
1411
1449
metadata = parquet_file .metadata
0 commit comments