From 8d00e4fff2d71771657412fbe3d201da4beee5ac Mon Sep 17 00:00:00 2001 From: Mikhail Melnik Date: Fri, 22 May 2026 12:52:41 +0200 Subject: [PATCH 1/2] Fix VariantUtil string decoding to use explicit UTF-8 charset --- .../org/apache/parquet/variant/VariantUtil.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java b/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java index 7ad867e0fd..7b8d477cab 100644 --- a/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java +++ b/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java @@ -20,6 +20,7 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import org.apache.parquet.Preconditions; @@ -657,12 +658,12 @@ static String getString(ByteBuffer value) { checkIndex(start + length - 1, value.limit()); if (value.hasArray()) { // If the buffer is backed by an array, we can use the array directly. - return new String(value.array(), value.arrayOffset() + start, length); + return new String(value.array(), value.arrayOffset() + start, length, StandardCharsets.UTF_8); } else { // If the buffer is not backed by an array, we need to copy the bytes into a new array. byte[] valueArray = new byte[length]; slice(value, start).get(valueArray); - return new String(valueArray); + return new String(valueArray, StandardCharsets.UTF_8); } } throw unexpectedType(Variant.Type.STRING, value); @@ -825,12 +826,13 @@ static String getMetadataKey(ByteBuffer metadata, int id) { } checkIndex(dataPos + nextOffset - 1, metadata.limit()); if (metadata.hasArray() && !metadata.isReadOnly()) { - return new String(metadata.array(), metadata.arrayOffset() + dataPos + offset, nextOffset - offset); + return new String( + metadata.array(), metadata.arrayOffset() + dataPos + offset, nextOffset - offset, StandardCharsets.UTF_8); } else { // ByteBuffer does not have an array, so we need to use the `get` method to read the bytes. byte[] metadataArray = new byte[nextOffset - offset]; slice(metadata, dataPos + offset).get(metadataArray); - return new String(metadataArray); + return new String(metadataArray, StandardCharsets.UTF_8); } } @@ -861,13 +863,14 @@ static HashMap getMetadataMap(ByteBuffer metadata) { new String( metadata.array(), metadata.arrayOffset() + pos + stringStart + offset, - nextOffset - offset), + nextOffset - offset, + StandardCharsets.UTF_8), id); } else { // ByteBuffer does not have an array, so we need to use the `get` method to read the bytes. byte[] metadataArray = new byte[nextOffset - offset]; slice(metadata, pos + stringStart + offset).get(metadataArray); - result.put(new String(metadataArray), id); + result.put(new String(metadataArray, StandardCharsets.UTF_8), id); } offset = nextOffset; } From dd1b3da83c13678688d8cd30cabb641aa722cba7 Mon Sep 17 00:00:00 2001 From: Mikhail Melnik Date: Sat, 23 May 2026 20:57:27 +0200 Subject: [PATCH 2/2] Apply Spotless formatting --- .../main/java/org/apache/parquet/variant/VariantUtil.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java b/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java index 7b8d477cab..f50a0f3162 100644 --- a/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java +++ b/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java @@ -827,7 +827,10 @@ static String getMetadataKey(ByteBuffer metadata, int id) { checkIndex(dataPos + nextOffset - 1, metadata.limit()); if (metadata.hasArray() && !metadata.isReadOnly()) { return new String( - metadata.array(), metadata.arrayOffset() + dataPos + offset, nextOffset - offset, StandardCharsets.UTF_8); + metadata.array(), + metadata.arrayOffset() + dataPos + offset, + nextOffset - offset, + StandardCharsets.UTF_8); } else { // ByteBuffer does not have an array, so we need to use the `get` method to read the bytes. byte[] metadataArray = new byte[nextOffset - offset];