From f99b4c8d0b4ebeef3a46b5786f6013144129aff0 Mon Sep 17 00:00:00 2001 From: "E. Madison Bray" <676149+embray@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:27:39 +0200 Subject: [PATCH] fix: time format validation only applies to strings For values in decimal formats the time format is ambiguous and should not be guessed. Likewise, according the the actual schema, the string formats are only valid in the schema if they are guessable, basically (i.e. 'J2000.0', not just '2000.0', even if `format: jyear` is specified. Follow-up to #205 which intended to fix this as well. --- src/core/time.c | 43 ++++++++++++++++++++++++++-------------- tests/fixtures/time.asdf | 4 ++-- tests/test-time.c | 9 ++++++--- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/core/time.c b/src/core/time.c index 63496c80..a16f663a 100644 --- a/src/core/time.c +++ b/src/core/time.c @@ -705,23 +705,31 @@ static asdf_value_t *asdf_time_serialize( * determined. The return type is a signed ``int`` (not the enum) because * `asdf_time_format_t` may be an unsigned type, in which case a -1 enum value * would compare as a large positive number. + * + * The auto-detect/validation regexps mirror the schema's string-form patterns + * (e.g. byear/jyear require a ``B``/``J`` prefix, etc.), so they only apply + * when the original YAML value was a string. Numeric values are accepted + * as-is for the explicit format and cannot be guessed. */ static int validate_or_guess_time_format( - asdf_value_t *value, const char *time_s, const char *format_s) { + asdf_value_t *value, const char *time_s, asdf_value_type_t time_type, const char *format_s) { asdf_time_format_t format; compile_time_auto_regexes(); if (!format_s) { - /* No explicit format: auto-detect from the value string. */ - for (size_t idx = 0; idx < TIME_AUTO_COUNT; idx++) { - if (UNLIKELY(time_auto_regexes[idx].error != CREG_OK)) - continue; - csview match[CREG_MAX_CAPTURES] = {0}; - if (cregex_match(&time_auto_regexes[idx], time_s, match) != CREG_OK) - continue; - validate_datetime_ranges(value->file, (int)idx, time_s, match); - return (int)time_auto_patterns[idx].type; + /* No explicit format: auto-detect from the value string. This is only + * possible for string values -- a bare number is ambiguous. */ + if (time_type == ASDF_VALUE_STRING) { + for (size_t idx = 0; idx < TIME_AUTO_COUNT; idx++) { + if (UNLIKELY(time_auto_regexes[idx].error != CREG_OK)) + continue; + csview match[CREG_MAX_CAPTURES] = {0}; + if (cregex_match(&time_auto_regexes[idx], time_s, match) != CREG_OK) + continue; + validate_datetime_ranges(value->file, (int)idx, time_s, match); + return (int)time_auto_patterns[idx].type; + } } ASDF_LOG( @@ -739,11 +747,13 @@ static int validate_or_guess_time_format( return -1; } - /* Validate the value string against the format's auto-detect pattern, - * if one exists. This is informational only -- a mismatch is a - * warning, not an error. */ + /* Validate a string value against the format's auto-detect pattern, if one + * exists. This is informational only -- a mismatch is a warning, not an + * error. Numeric values have no such pattern and are left to the format + * parser. */ int pat_idx = find_auto_pattern_idx(format); - if (pat_idx >= 0 && time_auto_regexes[pat_idx].error == CREG_OK) { + if (time_type == ASDF_VALUE_STRING && pat_idx >= 0 && + time_auto_regexes[pat_idx].error == CREG_OK) { csview match[CREG_MAX_CAPTURES] = {0}; if (cregex_match(&time_auto_regexes[pat_idx], time_s, match) != CREG_OK) ASDF_LOG( @@ -796,6 +806,9 @@ static asdf_value_err_t asdf_time_deserialize( return ASDF_VALUE_ERR_OOM; } + /* Capture the inferred type of the value for use later */ + asdf_value_type_t value_type = asdf_value_get_type(prop); + /* Capture the original scalar text verbatim regardless of the inferred YAML * type; the raw representation is exactly what the time format parsers * expect, even if it parses as a decimal type. */ @@ -837,7 +850,7 @@ static asdf_value_err_t asdf_time_deserialize( } } - int detected = validate_or_guess_time_format(value, time->value, format_s); + int detected = validate_or_guess_time_format(value, time->value, value_type, format_s); if (detected < 0) { err = ASDF_VALUE_ERR_PARSE_FAILURE; diff --git a/tests/fixtures/time.asdf b/tests/fixtures/time.asdf index 338849ea..4230fcb8 100644 --- a/tests/fixtures/time.asdf +++ b/tests/fixtures/time.asdf @@ -24,8 +24,8 @@ t_iso_time_bare: !time/time-1.4.0 '2025-10-14T13:26:41.0000' t_jyear_bare: !time/time-1.4.0 'J2025.78707178' t_yday_bare: !time/time-1.4.0 '2025:287:13:26:41.0000' t_yday_map_no_format: !time/time-1.4.0 {value: '2025:287:13:26:41.0000'} -t_jyear: !time/time-1.4.0 {format: jyear, value: '2025.78707178'} +t_jyear: !time/time-1.4.0 {format: jyear, value: 'J2025.78707178'} t_jyear_num: !time/time-1.4.0 {format: jyear, value: 1948.78707178} -t_decimalyear: !time/time-1.4.0 {format: decimalyear, value: '2025.5'} +t_decimalyear: !time/time-1.4.0 {format: decimalyear, value: 2025.5} t_iso_time_tai: !time/time-1.4.0 {format: iso_time, scale: tai, value: '2025-10-14T13:26:41.0000'} ... diff --git a/tests/test-time.c b/tests/test-time.c index e5d2996e..03576ef0 100644 --- a/tests/test-time.c +++ b/tests/test-time.c @@ -258,9 +258,12 @@ MU_TEST(test_asdf_time_jyear_decimalyear) { const char *expected_value; int expected_year; } cases[] = { - {"t_jyear", ASDF_TIME_FORMAT_JYEAR, "2025.78707178", 2025}, - {"t_jyear_num", ASDF_TIME_FORMAT_JYEAR, "1948.78707178", 1948}, - {"t_decimalyear", ASDF_TIME_FORMAT_DECIMALYEAR, "2025.5", 2025}, + /* string form: explicit J prefix is required for a string jyear */ + {"t_jyear", ASDF_TIME_FORMAT_JYEAR, "J2025.78707178", 2025}, + /* numeric form: a bare number is valid for jyear/decimalyear and the + * verbatim scalar text is captured at full precision */ + {"t_jyear_num", ASDF_TIME_FORMAT_JYEAR, "1948.78707178", 1948}, + {"t_decimalyear", ASDF_TIME_FORMAT_DECIMALYEAR, "2025.5", 2025}, }; for (size_t idx = 0; idx < sizeof(cases) / sizeof(cases[0]); idx++) {