@@ -206,7 +206,7 @@ void produce_column_header_dta(void *csv_metadata, const char *column, readstat_
206
206
break ;
207
207
case EXTRACT_METADATA_FORMAT_TIME :
208
208
case EXTRACT_METADATA_FORMAT_DATE_TIME :
209
- var -> type = READSTAT_TYPE_INT32 ;
209
+ var -> type = READSTAT_TYPE_DOUBLE ;
210
210
snprintf (var -> format , sizeof (var -> format ), "%s" , "%tC" );
211
211
// %tC => is equivalent to coordinated universal time (UTC)
212
212
break ;
@@ -385,17 +385,115 @@ static readstat_value_t value_double_dta(const char *s, size_t len, struct csv_m
385
385
return value ;
386
386
}
387
387
388
+ static readstat_value_t value_double_date_time_dta (const char * s , size_t len , struct csv_metadata * c ) {
389
+ // Handle empty or NULL strings as missing values
390
+ if (s == NULL || len == 0 || * s == '\0' ) {
391
+ readstat_value_t value = {
392
+ .type = READSTAT_TYPE_DOUBLE ,
393
+ .is_system_missing = 1 ,
394
+ .v = { .double_value = NAN }
395
+ };
396
+ return value ;
397
+ }
398
+
399
+ // Truncate the date string to 23 characters to remove the timezone offset and
400
+ // microseconds, if present. STATA does not support timezones or microseconds.
401
+ char date_time [24 ];
402
+ snprintf (date_time , sizeof (date_time ), "%s" , s );
403
+
404
+ // Parse date-time components
405
+ int year , month , day , hour , minute , second , msecs = 0 ;
406
+ int matched = sscanf (
407
+ date_time ,
408
+ "%d-%d-%d %d:%d:%d.%d" ,
409
+ & year , & month , & day , & hour , & minute , & second , & msecs
410
+ );
411
+ if (matched < 6 || matched > 8 ) {
412
+ fprintf (stderr , "%s:%d not a valid date-time: %s (expected format: yyyy-mm-dd hh:MM:SS with optional milliseconds. Datetime string is truncated at 23 characters to ignore microseconds and timezone information.)\n" , __FILE__ , __LINE__ , date_time );
413
+ exit (EXIT_FAILURE );
414
+ }
415
+
416
+ // Get days since the epoch for the date
417
+ char days_since_epoch_string [11 ];
418
+ snprintf (days_since_epoch_string , sizeof (days_since_epoch_string ), "%04d-%02d-%02d" , year , month , day );
419
+ char * dest ;
420
+ int days_since_epoch = readstat_dta_num_days (days_since_epoch_string , & dest );
421
+
422
+ // Add the hours, minutes, and seconds to the days
423
+ double msecs_since_epoch = 86400000.0 * days_since_epoch + hour * 3600000.0 + minute * 60000.0 + second * 1000.0 + msecs * 1.0 ;
424
+
425
+ // Adjust for leap seconds; 27 have occurred as of writing this code
426
+ // https://en.m.wikipedia.org/wiki/Leap_second
427
+ typedef struct {
428
+ int year ;
429
+ int month ;
430
+ int day ;
431
+ } leap_second_date ;
432
+
433
+ leap_second_date leap_seconds [] = {
434
+ {1972 , 6 , 30 }, {1972 , 12 , 31 }, // +2 seconds in 1972
435
+ {1973 , 12 , 31 }, // +1 second in 1973
436
+ {1974 , 12 , 31 }, // +1 second in 1974
437
+ {1975 , 12 , 31 }, // +1 second in 1975
438
+ {1976 , 12 , 31 }, // +1 second in 1976
439
+ {1977 , 12 , 31 }, // +1 second in 1977
440
+ {1978 , 12 , 31 }, // +1 second in 1978
441
+ {1979 , 12 , 31 }, // +1 second in 1979
442
+ {1981 , 6 , 30 }, // +1 second in 1981
443
+ {1982 , 6 , 30 }, // +1 second in 1982
444
+ {1983 , 6 , 30 }, // +1 second in 1983
445
+ {1985 , 6 , 30 }, // +1 second in 1985
446
+ {1987 , 12 , 31 }, // +1 second in 1987
447
+ {1989 , 12 , 31 }, // +1 second in 1989
448
+ {1990 , 12 , 31 }, // +1 second in 1990
449
+ {1992 , 6 , 30 }, // +1 second in 1992
450
+ {1993 , 6 , 30 }, // +1 second in 1993
451
+ {1994 , 6 , 30 }, // +1 second in 1994
452
+ {1995 , 12 , 31 }, // +1 second in 1995
453
+ {1997 , 6 , 30 }, // +1 second in 1997
454
+ {1998 , 12 , 31 }, // +1 second in 1998
455
+ {2005 , 12 , 31 }, // +1 second in 2005
456
+ {2008 , 12 , 31 }, // +1 second in 2008
457
+ {2012 , 6 , 30 }, // +1 second in 2012
458
+ {2015 , 6 , 30 }, // +1 second in 2015
459
+ {2016 , 12 , 31 } // +1 second in 2016
460
+ };
461
+
462
+ int leap_second_count = sizeof (leap_seconds ) / sizeof (leap_seconds [0 ]);
463
+ int leap_seconds_to_add = 0 ;
464
+
465
+ for (int i = 0 ; i < leap_second_count ; i ++ ) {
466
+ // If the date is after this leap second, add one second
467
+ if (
468
+ (year > leap_seconds [i ].year ) ||
469
+ (year == leap_seconds [i ].year && month > leap_seconds [i ].month ) ||
470
+ (year == leap_seconds [i ].year && month == leap_seconds [i ].month && day > leap_seconds [i ].day )
471
+ ) { leap_seconds_to_add ++ ; }
472
+ }
473
+ msecs_since_epoch += leap_seconds_to_add * 1000.0 ;
474
+
475
+ readstat_value_t value = {
476
+ .type = READSTAT_TYPE_DOUBLE ,
477
+ .v = { .double_value = msecs_since_epoch }
478
+ };
479
+
480
+ return value ;
481
+ }
482
+
388
483
void produce_csv_value_dta (void * csv_metadata , const char * s , size_t len ) {
389
484
struct csv_metadata * c = (struct csv_metadata * )csv_metadata ;
390
485
readstat_variable_t * var = & c -> variables [c -> columns ];
391
486
int is_date = c -> is_date [c -> columns ];
487
+ int is_date_time = c -> is_date_time [c -> columns ];
392
488
int obs_index = c -> rows - 1 ; // TODO: ???
393
489
readstat_value_t value ;
394
490
395
491
if (len == 0 ) {
396
492
value = value_sysmiss (s , len , c );
397
493
} else if (is_date ) {
398
494
value = value_int32_date_dta (s , len , c );
495
+ } else if (is_date_time ) {
496
+ value = value_double_date_time_dta (s , len , c );
399
497
} else if (var -> type == READSTAT_TYPE_DOUBLE ) {
400
498
value = value_double_dta (s , len , c );
401
499
} else if (var -> type == READSTAT_TYPE_STRING ) {
0 commit comments