diff --git a/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java b/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java index 7ad867e0fd..f50a0f3162 100644 --- a/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java +++ b/parquet-variant/src/main/java/org/apache/parquet/variant/VariantUtil.java @@ -20,6 +20,7 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import org.apache.parquet.Preconditions; @@ -657,12 +658,12 @@ static String getString(ByteBuffer value) { checkIndex(start + length - 1, value.limit()); if (value.hasArray()) { // If the buffer is backed by an array, we can use the array directly. - return new String(value.array(), value.arrayOffset() + start, length); + return new String(value.array(), value.arrayOffset() + start, length, StandardCharsets.UTF_8); } else { // If the buffer is not backed by an array, we need to copy the bytes into a new array. byte[] valueArray = new byte[length]; slice(value, start).get(valueArray); - return new String(valueArray); + return new String(valueArray, StandardCharsets.UTF_8); } } throw unexpectedType(Variant.Type.STRING, value); @@ -825,12 +826,16 @@ static String getMetadataKey(ByteBuffer metadata, int id) { } checkIndex(dataPos + nextOffset - 1, metadata.limit()); if (metadata.hasArray() && !metadata.isReadOnly()) { - return new String(metadata.array(), metadata.arrayOffset() + dataPos + offset, nextOffset - offset); + return new String( + metadata.array(), + metadata.arrayOffset() + dataPos + offset, + nextOffset - offset, + StandardCharsets.UTF_8); } else { // ByteBuffer does not have an array, so we need to use the `get` method to read the bytes. byte[] metadataArray = new byte[nextOffset - offset]; slice(metadata, dataPos + offset).get(metadataArray); - return new String(metadataArray); + return new String(metadataArray, StandardCharsets.UTF_8); } } @@ -861,13 +866,14 @@ static HashMap getMetadataMap(ByteBuffer metadata) { new String( metadata.array(), metadata.arrayOffset() + pos + stringStart + offset, - nextOffset - offset), + nextOffset - offset, + StandardCharsets.UTF_8), id); } else { // ByteBuffer does not have an array, so we need to use the `get` method to read the bytes. byte[] metadataArray = new byte[nextOffset - offset]; slice(metadata, pos + stringStart + offset).get(metadataArray); - result.put(new String(metadataArray), id); + result.put(new String(metadataArray, StandardCharsets.UTF_8), id); } offset = nextOffset; }