From 780dc227f6aefde1140808cd339844d38f7b7569 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Thu, 29 Jan 2026 09:52:25 +0200 Subject: [PATCH 1/2] GH-48866: [C++][Gandiva] Truncate subseconds beyond milliseconds in `castTIMESTAMP_utf8` and `castTIME_utf8` (#48867) ### Rationale for this change Fixes #48866. The Gandiva precompiled time functions `castTIMESTAMP_utf8` and `castTIME_utf8` currently reject timestamp and time string literals with more than 3 subsecond digits (beyond millisecond precision), throwing an "Invalid millis" error. This behavior is inconsistent with other implementations. ### What changes are included in this PR? - Fixed `castTIMESTAMP_utf8` and `castTIME_utf8` functions to truncate subseconds beyond 3 digits instead of throwing an error - Updated tests. Replaced error-expecting tests with truncation verification tests and added edge cases ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #48866 Authored-by: Arkadii Kravchuk Signed-off-by: Sutou Kouhei --- cpp/src/gandiva/precompiled/time.cc | 49 +++++++++++---------- cpp/src/gandiva/precompiled/time_test.cc | 54 +++++++++++++++++------- 2 files changed, 65 insertions(+), 38 deletions(-) diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 9b09228773b2..ecfff4fe72a0 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -564,6 +564,27 @@ bool is_valid_time(const int hours, const int minutes, const int seconds) { seconds < 60; } +// Normalize sub-seconds value to milliseconds precision (3 digits). +// Truncates if more than 3 digits are provided, pads with zeros if fewer than 3 digits +static inline int32_t normalize_subseconds_to_millis(int32_t subseconds, + int32_t num_digits) { + if (num_digits <= 0 || num_digits == 3) { + // No need to adjust + return subseconds; + } + // Calculate the power of 10 adjustment needed + int32_t digit_diff = num_digits - 3; + while (digit_diff > 0) { + subseconds /= 10; + digit_diff--; + } + while (digit_diff < 0) { + subseconds *= 10; + digit_diff++; + } + return subseconds; +} + // MONTHS_BETWEEN returns number of months between dates date1 and date2. // If date1 is later than date2, then the result is positive. // If date1 is earlier than date2, then the result is negative. @@ -744,17 +765,8 @@ gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 l } // adjust the milliseconds - if (sub_seconds_len > 0) { - if (sub_seconds_len > 3) { - const char* msg = "Invalid millis for timestamp value "; - set_error_for_date(length, input, msg, context); - return 0; - } - while (sub_seconds_len < 3) { - ts_fields[TimeFields::kSubSeconds] *= 10; - sub_seconds_len++; - } - } + ts_fields[TimeFields::kSubSeconds] = + normalize_subseconds_to_millis(ts_fields[TimeFields::kSubSeconds], sub_seconds_len); // handle timezone if (encountered_zone) { int err = 0; @@ -864,18 +876,9 @@ gdv_time32 castTIME_utf8(int64_t context, const char* input, int32_t length) { } // adjust the milliseconds - if (sub_seconds_len > 0) { - if (sub_seconds_len > 3) { - const char* msg = "Invalid millis for time value "; - set_error_for_date(length, input, msg, context); - return 0; - } - - while (sub_seconds_len < 3) { - time_fields[TimeFields::kSubSeconds - TimeFields::kHours] *= 10; - sub_seconds_len++; - } - } + time_fields[TimeFields::kSubSeconds - TimeFields::kHours] = + normalize_subseconds_to_millis( + time_fields[TimeFields::kSubSeconds - TimeFields::kHours], sub_seconds_len); int32_t input_hours = time_fields[TimeFields::kHours - TimeFields::kHours]; int32_t input_minutes = time_fields[TimeFields::kMinutes - TimeFields::kHours]; diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index 316e5ecc2602..bdaf3dc2a5fa 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -121,15 +121,26 @@ TEST(TestTime, TestCastTimestamp) { "Not a valid time for timestamp value 2000-01-01 00:00:100"); context.Reset(); - EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), 0); - EXPECT_EQ(context.get_error(), - "Invalid millis for timestamp value 2000-01-01 00:00:00.0001"); - context.Reset(); - - EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), 0); - EXPECT_EQ(context.get_error(), - "Invalid millis for timestamp value 2000-01-01 00:00:00.1000"); - context.Reset(); + // Test truncation of subseconds to 3 digits (milliseconds) + // "2000-01-01 00:00:00.0001" should truncate to "2000-01-01 00:00:00.000" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.0001", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.000", 23)); + + // "2000-01-01 00:00:00.1000" should truncate to "2000-01-01 00:00:00.100" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1000", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.100", 23)); + + // "2000-01-01 00:00:00.123456789" should truncate to "2000-01-01 00:00:00.123" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.123456789", 29), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.123", 23)); + + // "2000-01-01 00:00:00.1999" should truncate to "2000-01-01 00:00:00.199" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1999", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.199", 23)); + + // "2000-01-01 00:00:00.1994" should truncate to "2000-01-01 00:00:00.199" + EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.1994", 24), + castTIMESTAMP_utf8(context_ptr, "2000-01-01 00:00:00.199", 23)); } TEST(TestTime, TestCastTimeUtf8) { @@ -165,13 +176,26 @@ TEST(TestTime, TestCastTimeUtf8) { EXPECT_EQ(context.get_error(), "Not a valid time value 00:00:100"); context.Reset(); - EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), 0); - EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.0001"); - context.Reset(); + // Test truncation of subseconds to 3 digits (milliseconds) + // "00:00:00.0001" should truncate to "00:00:00.000" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.0001", 13), + castTIME_utf8(context_ptr, "00:00:00.000", 12)); - EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), 0); - EXPECT_EQ(context.get_error(), "Invalid millis for time value 00:00:00.1000"); - context.Reset(); + // "00:00:00.1000" should truncate to "00:00:00.100" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1000", 13), + castTIME_utf8(context_ptr, "00:00:00.100", 12)); + + // "9:45:30.123456789" should truncate to "9:45:30.123" + EXPECT_EQ(castTIME_utf8(context_ptr, "9:45:30.123456789", 17), + castTIME_utf8(context_ptr, "9:45:30.123", 11)); + + // "00:00:00.1999" should truncate to "00:00:00.199" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1999", 13), + castTIME_utf8(context_ptr, "00:00:00.199", 12)); + + // "00:00:00.1994" should truncate to "00:00:00.199" + EXPECT_EQ(castTIME_utf8(context_ptr, "00:00:00.1994", 13), + castTIME_utf8(context_ptr, "00:00:00.199", 12)); } #ifndef _WIN32 From 6dce7d12b345c769ced18586f4adbf536994ace3 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Wed, 3 Jun 2026 01:22:16 +0300 Subject: [PATCH 2/2] DX-120970 Fix CI: ubuntu-24.04-arm runner, docker-compose shim, remove brew pin - Replace unavailable buildjet-8vcpu-ubuntu-2204-arm with ubuntu-24.04-arm - Add docker-compose shim: ubuntu-24.04-arm ships only Docker Compose v2 as a plugin (docker compose); archery requires the standalone binary - Remove brew pin cmake/boost: newer Homebrew (runner >=20260525) resolves cmake via the API and hits the cmake cask when pinning; the pins are not needed in CI since no brew upgrade runs after the local tap install - Fix stale comment: remove "Add commentMore actions" GitHub UI artifact --- dev/tasks/java-jars/github.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index ff1834e63b91..50a57c22aab5 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -39,7 +39,7 @@ jobs: archery_arch: "amd64" archery_arch_alias: "x86_64" archery_arch_short: "amd64" - - runs_on: ["buildjet-8vcpu-ubuntu-2204-arm"] + - runs_on: ["ubuntu-24.04-arm"] arch: "aarch_64" archery_arch: "arm64v8" archery_arch_alias: "aarch64" @@ -65,6 +65,11 @@ jobs: env: {{ macros.github_set_sccache_envvars()|indent(8) }} run: | + # ubuntu-24.04-arm ships Docker Compose v2 as a plugin only; archery needs standalone docker-compose + if ! command -v docker-compose &>/dev/null; then + printf '#!/bin/sh\nexec docker compose "$@"\n' | sudo tee /usr/local/bin/docker-compose >/dev/null + sudo chmod +x /usr/local/bin/docker-compose + fi archery docker run \ -e ARROW_JAVA_BUILD=OFF \ -e ARROW_JAVA_TEST=OFF \ @@ -148,7 +153,7 @@ jobs: # used on test We uninstall Homebrew's Protobuf to ensure using # bundled Protobuf. brew uninstall protobuf - # fix cmake and boost versionsAdd commentMore actions + # fix cmake and boost versions brew uninstall -f boost || true brew uninstall -f cmake || true mkdir -p homebrew-custom/Formula @@ -158,8 +163,6 @@ jobs: cp ./homebrew-custom/Formula/*.rb "$(brew --repo local/homebrew-custom)/Formula/" brew install -v local/homebrew-custom/cmake brew install -v local/homebrew-custom/boost - brew pin cmake - brew pin boost #