From 8872b6e1a282f7f5d21397a223842091b3cc17cb Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Fri, 20 Sep 2019 20:56:24 +0800 Subject: [PATCH 1/2] [ARROW-6184][Java] Provide hash table based dictionary encoder --- .../HashTableDictionaryEncoder.java | 151 ++++++++ .../TestHashTableDictionaryEncoder.java | 350 ++++++++++++++++++ 2 files changed, 501 insertions(+) create mode 100644 java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java create mode 100644 java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java new file mode 100644 index 000000000000..aec9d6bc343e --- /dev/null +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.algorithm.dictionary; + +import java.util.HashMap; + +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.memory.util.hash.SimpleHasher; +import org.apache.arrow.vector.BaseIntVector; +import org.apache.arrow.vector.ElementAddressableVector; + +/** + * Dictionary encoder based on hash table. + * @param encoded vector type. + * @param decoded vector type, which is also the dictionary type. + */ +public class HashTableDictionaryEncoder { + + /** + * The dictionary for encoding/decoding. + * It must be sorted. + */ + private final D dictionary; + + /** + * The hasher used to compute the hash code. + */ + private final ArrowBufHasher hasher; + + /** + * A flag indicating if null should be encoded. + */ + private final boolean encodeNull; + + /** + * The hash map for distinct dictionary entries. + * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + */ + private HashMap hashMap = new HashMap<>(); + + /** + * The pointer used to probe each element to encode. + */ + private ArrowBufPointer reusablePointer; + + /** + * Constructs a dictionary encoder. + * @param dictionary the dictionary. + * + */ + public HashTableDictionaryEncoder(D dictionary) { + this(dictionary, false); + } + + /** + * Constructs a dictionary encoder. + * @param dictionary the dictionary. + * @param encodeNull a flag indicating if null should be encoded. + * It determines the behaviors for processing null values in the input during encoding/decoding. + *
  • + * For encoding, when a null is encountered in the input, + * 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. + * 2) If the flag is set to false, the encoder simply produces a null in the output. + *
  • + *
  • + * For decoding, when a null is encountered in the input, + * 1) If the flag is set to true, the decoder should never expect a null in the input. + * 2) If set to false, the decoder simply produces a null in the output. + *
  • + */ + public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { + this(dictionary, encodeNull, SimpleHasher.INSTANCE); + } + + /** + * Constructs a dictionary encoder. + * @param dictionary the dictionary. + * @param encodeNull a flag indicating if null should be encoded. + * It determines the behaviors for processing null values in the input during encoding/decoding. + *
  • + * For encoding, when a null is encountered in the input, + * 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. + * 2) If the flag is set to false, the encoder simply produces a null in the output. + *
  • + *
  • + * For decoding, when a null is encountered in the input, + * 1) If the flag is set to true, the decoder should never expect a null in the input. + * 2) If set to false, the decoder simply produces a null in the output. + *
  • + * @param hasher the hasher used to calculate the hash code. + */ + public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) { + this.dictionary = dictionary; + this.hasher = hasher; + this.encodeNull = encodeNull; + + reusablePointer = new ArrowBufPointer(hasher); + + buildHashMap(); + } + + private void buildHashMap() { + for (int i = 0; i < dictionary.getValueCount(); i++) { + ArrowBufPointer pointer = new ArrowBufPointer(hasher); + dictionary.getDataPointer(i, pointer); + hashMap.put(pointer, i); + } + } + + /** + * Encodes an input vector by a hash table. + * So the algorithm takes O(n) time, where n is the length of the input vector. + * + * @param input the input vector. + * @param output the output vector. + **/ + public void encode(D input, E output) { + for (int i = 0; i < input.getValueCount(); i++) { + if (!encodeNull && input.isNull(i)) { + continue; + } + + input.getDataPointer(i, reusablePointer); + Integer index = hashMap.get(reusablePointer); + + if (index == null) { + throw new IllegalArgumentException("The data element is not found in the dictionary"); + } + output.setWithPossibleTruncate(i, index); + } + output.setValueCount(input.getValueCount()); + } +} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java new file mode 100644 index 000000000000..bcc2ef077548 --- /dev/null +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.algorithm.dictionary; + +import static junit.framework.TestCase.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Random; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryEncoder; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test cases for {@link HashTableDictionaryEncoder}. + */ +public class TestHashTableDictionaryEncoder { + + private final int VECTOR_LENGTH = 50; + + private final int DICTIONARY_LENGTH = 10; + + private BufferAllocator allocator; + + byte[] zero = "000".getBytes(StandardCharsets.UTF_8); + byte[] one = "111".getBytes(StandardCharsets.UTF_8); + byte[] two = "222".getBytes(StandardCharsets.UTF_8); + + byte[][] data = new byte[][]{zero, one, two}; + + @Before + public void prepare() { + allocator = new RootAllocator(1024 * 1024); + } + + @After + public void shutdown() { + allocator.close(); + } + + @Test + public void testEncodeAndDecode() { + Random random = new Random(); + try (VarCharVector rawVector = new VarCharVector("original vector", allocator); + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + + // set up dictionary + dictionary.allocateNew(); + for (int i = 0; i < DICTIONARY_LENGTH; i++) { + // encode "i" as i + dictionary.setSafe(i, String.valueOf(i).getBytes()); + } + dictionary.setValueCount(DICTIONARY_LENGTH); + + // set up raw vector + rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH); + for (int i = 0; i < VECTOR_LENGTH; i++) { + int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH; + rawVector.set(i, String.valueOf(val).getBytes()); + } + rawVector.setValueCount(VECTOR_LENGTH); + + HashTableDictionaryEncoder encoder = + new HashTableDictionaryEncoder<>(dictionary, false); + + // perform encoding + encodedVector.allocateNew(); + encoder.encode(rawVector, encodedVector); + + // verify encoding results + assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); + for (int i = 0; i < VECTOR_LENGTH; i++) { + assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes()); + } + + // perform decoding + Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); + try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + + // verify decoding results + assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); + for (int i = 0; i < VECTOR_LENGTH; i++) { + assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i)); + } + } + } + } + + @Test + public void testEncodeAndDecodeWithNull() { + Random random = new Random(); + try (VarCharVector rawVector = new VarCharVector("original vector", allocator); + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + + // set up dictionary + dictionary.allocateNew(); + dictionary.setNull(0); + for (int i = 1; i < DICTIONARY_LENGTH; i++) { + // encode "i" as i + dictionary.setSafe(i, String.valueOf(i).getBytes()); + } + dictionary.setValueCount(DICTIONARY_LENGTH); + + // set up raw vector + rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH); + for (int i = 0; i < VECTOR_LENGTH; i++) { + if (i % 10 == 0) { + rawVector.setNull(i); + } else { + int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1; + rawVector.set(i, String.valueOf(val).getBytes()); + } + } + rawVector.setValueCount(VECTOR_LENGTH); + + HashTableDictionaryEncoder encoder = + new HashTableDictionaryEncoder<>(dictionary, true); + + // perform encoding + encodedVector.allocateNew(); + encoder.encode(rawVector, encodedVector); + + // verify encoding results + assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); + for (int i = 0; i < VECTOR_LENGTH; i++) { + if (i % 10 == 0) { + assertEquals(0, encodedVector.get(i)); + } else { + assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes()); + } + } + + // perform decoding + Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); + try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + // verify decoding results + assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); + for (int i = 0; i < VECTOR_LENGTH; i++) { + if (i % 10 == 0) { + assertTrue(decodedVector.isNull(i)); + } else { + assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i)); + } + } + } + } + } + + @Test + public void testEncodeNoNullInDictionary() { + try (VarCharVector rawVector = new VarCharVector("original vector", allocator); + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + + // set up dictionary, with no null in it. + dictionary.allocateNew(); + for (int i = 0; i < DICTIONARY_LENGTH; i++) { + // encode "i" as i + dictionary.setSafe(i, String.valueOf(i).getBytes()); + } + dictionary.setValueCount(DICTIONARY_LENGTH); + + // the vector to encode has a null inside. + rawVector.allocateNew(1); + rawVector.setNull(0); + rawVector.setValueCount(1); + + encodedVector.allocateNew(); + + HashTableDictionaryEncoder encoder = + new HashTableDictionaryEncoder<>(dictionary, true); + + // the encoder should encode null, but no null in the dictionary, + // so an exception should be thrown. + assertThrows(IllegalArgumentException.class, () -> { + encoder.encode(rawVector, encodedVector); + }); + } + } + + @Test + public void testEncodeStrings() { + // Create a new value vector + try (final VarCharVector vector = new VarCharVector("foo", allocator); + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + + vector.allocateNew(512, 5); + encoded.allocateNew(); + + // set some values + vector.setSafe(0, zero, 0, zero.length); + vector.setSafe(1, one, 0, one.length); + vector.setSafe(2, one, 0, one.length); + vector.setSafe(3, two, 0, two.length); + vector.setSafe(4, zero, 0, zero.length); + vector.setValueCount(5); + + // set some dictionary values + dictionaryVector.allocateNew(512, 3); + dictionaryVector.setSafe(0, zero, 0, one.length); + dictionaryVector.setSafe(1, one, 0, two.length); + dictionaryVector.setSafe(2, two, 0, zero.length); + dictionaryVector.setValueCount(3); + + HashTableDictionaryEncoder encoder = + new HashTableDictionaryEncoder<>(dictionaryVector); + encoder.encode(vector, encoded); + + // verify indices + assertEquals(5, encoded.getValueCount()); + assertEquals(0, encoded.get(0)); + assertEquals(1, encoded.get(1)); + assertEquals(1, encoded.get(2)); + assertEquals(2, encoded.get(3)); + assertEquals(0, encoded.get(4)); + + // now run through the decoder and verify we get the original back + Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); + try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) { + + assertEquals(vector.getValueCount(), decoded.getValueCount()); + for (int i = 0; i < 5; i++) { + assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i)); + } + } + } + } + + @Test + public void testEncodeLargeVector() { + // Create a new value vector + try (final VarCharVector vector = new VarCharVector("foo", allocator); + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + vector.allocateNew(); + encoded.allocateNew(); + + int count = 10000; + + for (int i = 0; i < 10000; ++i) { + vector.setSafe(i, data[i % 3], 0, data[i % 3].length); + } + vector.setValueCount(count); + + dictionaryVector.allocateNew(512, 3); + dictionaryVector.setSafe(0, zero, 0, one.length); + dictionaryVector.setSafe(1, one, 0, two.length); + dictionaryVector.setSafe(2, two, 0, zero.length); + dictionaryVector.setValueCount(3); + + HashTableDictionaryEncoder encoder = + new HashTableDictionaryEncoder<>(dictionaryVector); + encoder.encode(vector, encoded); + + assertEquals(count, encoded.getValueCount()); + for (int i = 0; i < count; ++i) { + assertEquals(i % 3, encoded.get(i)); + } + + // now run through the decoder and verify we get the original back + Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); + try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) { + assertEquals(vector.getClass(), decoded.getClass()); + assertEquals(vector.getValueCount(), decoded.getValueCount()); + for (int i = 0; i < count; ++i) { + assertEquals(vector.getObject(i), decoded.getObject(i)); + } + } + } + } + + @Test + public void testEncodeBinaryVector() { + // Create a new value vector + try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { + vector.allocateNew(512, 5); + vector.allocateNew(); + encoded.allocateNew(); + + // set some values + vector.setSafe(0, zero, 0, zero.length); + vector.setSafe(1, one, 0, one.length); + vector.setSafe(2, one, 0, one.length); + vector.setSafe(3, two, 0, two.length); + vector.setSafe(4, zero, 0, zero.length); + vector.setValueCount(5); + + // set some dictionary values + dictionaryVector.allocateNew(512, 3); + dictionaryVector.setSafe(0, zero, 0, one.length); + dictionaryVector.setSafe(1, one, 0, two.length); + dictionaryVector.setSafe(2, two, 0, zero.length); + dictionaryVector.setValueCount(3); + + HashTableDictionaryEncoder encoder = + new HashTableDictionaryEncoder<>(dictionaryVector); + encoder.encode(vector, encoded); + + assertEquals(5, encoded.getValueCount()); + assertEquals(0, encoded.get(0)); + assertEquals(1, encoded.get(1)); + assertEquals(1, encoded.get(2)); + assertEquals(2, encoded.get(3)); + assertEquals(0, encoded.get(4)); + + // now run through the decoder and verify we get the original back + Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); + try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dict)) { + + assertEquals(vector.getClass(), decoded.getClass()); + assertEquals(vector.getValueCount(), decoded.getValueCount()); + for (int i = 0; i < 5; i++) { + assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i))); + } + } + } + } +} From 7685b77ed8a54ea151103490d5324a4ea10cf39b Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 8 Oct 2019 11:03:42 +0800 Subject: [PATCH 2/2] [ARROW-6184][Java] Resolve comments --- .../dictionary/HashTableDictionaryEncoder.java | 17 +++++------------ .../dictionary/SearchDictionaryEncoder.java | 17 +++++------------ .../TestHashTableDictionaryEncoder.java | 2 +- .../dictionary/TestSearchDictionaryEncoder.java | 2 +- 4 files changed, 12 insertions(+), 26 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java index aec9d6bc343e..d0da655f314b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java @@ -93,18 +93,11 @@ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { * Constructs a dictionary encoder. * @param dictionary the dictionary. * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding/decoding. - *
  • - * For encoding, when a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. - *
  • - *
  • - * For decoding, when a null is encountered in the input, - * 1) If the flag is set to true, the decoder should never expect a null in the input. - * 2) If set to false, the decoder simply produces a null in the output. - *
  • + * It determines the behaviors for processing null values in the input during encoding. + * When a null is encountered in the input, + * 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. + * 2) If the flag is set to false, the encoder simply produces a null in the output. * @param hasher the hasher used to calculate the hash code. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java index 091a6f44dee6..9145cd9b1a69 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java @@ -59,18 +59,11 @@ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding/decoding. - *
  • - * For encoding, when a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. - *
  • - *
  • - * For decoding, when a null is encountered in the input, - * 1) If the flag is set to true, the decoder should never expect a null in the input. - * 2) If set to false, the decoder simply produces a null in the output. - *
  • + * It determines the behaviors for processing null values in the input during encoding. + * When a null is encountered in the input, + * 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. + * 2) If the flag is set to false, the encoder simply produces a null in the output. */ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java index bcc2ef077548..698e958b372c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java @@ -176,7 +176,7 @@ public void testEncodeAndDecodeWithNull() { } @Test - public void testEncodeNoNullInDictionary() { + public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); IntVector encodedVector = new IntVector("encoded vector", allocator); VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java index 22aaf183d8ac..81f912d21e19 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java @@ -182,7 +182,7 @@ public void testEncodeAndDecodeWithNull() { } @Test - public void testEncodeNoNullInDictionary() { + public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); IntVector encodedVector = new IntVector("encoded vector", allocator); VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {