Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,14 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
@Override
public List<ArrowBuf> getFieldBuffers() {
List<ArrowBuf> result = new ArrayList<>(2);

// Ensure offset buffer has at least one entry for offset[0].
// According to Arrow specification, offset buffer must have N+1 entries,
// even when N=0, it should contain [0].
if (offsetBuffer.capacity() == 0) {
offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH);
}

setReaderAndWriterIndex();
result.add(validityBuffer);
result.add(offsetBuffer);
Expand Down Expand Up @@ -309,7 +317,8 @@ private void setReaderAndWriterIndex() {
offsetBuffer.readerIndex(0);
if (valueCount == 0) {
validityBuffer.writerIndex(0);
offsetBuffer.writerIndex(0);
// Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec
offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity()));
} else {
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
@Override
public List<ArrowBuf> getFieldBuffers() {
List<ArrowBuf> result = new ArrayList<>(2);

// Ensure offset buffer has at least one entry for offset[0].
// According to Arrow specification, offset buffer must have N+1 entries,
// even when N=0, it should contain [0].
if (offsetBuffer.capacity() == 0) {
offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH);
}

setReaderAndWriterIndex();
result.add(validityBuffer);
result.add(offsetBuffer);
Expand Down Expand Up @@ -267,7 +275,8 @@ private void setReaderAndWriterIndex() {
offsetBuffer.readerIndex(0);
if (valueCount == 0) {
validityBuffer.writerIndex(0);
offsetBuffer.writerIndex(0);
// Even when valueCount is 0, offset buffer should have offset[0] per Arrow spec
offsetBuffer.writerIndex(Math.min(OFFSET_WIDTH, offsetBuffer.capacity()));
} else {
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount));
offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,36 @@ public void testCopyValueSafeForExtensionType() throws Exception {
}
}

@Test
public void testNestedEmptyLargeListOffsetBuffer() {
// Test that nested LargeListVector properly allocates offset buffer
// even when nested writers are never invoked. According to Arrow spec,
// offset buffer must have N+1 entries. Even when N=0, it should contain [0].
try (LargeListVector outerList = LargeListVector.empty("outer", allocator)) {
// Setup LargeList<LargeList<Int>>
outerList.addOrGetVector(FieldType.nullable(MinorType.LARGELIST.getType()));
LargeListVector innerList = (LargeListVector) outerList.getDataVector();
innerList.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));

// Allocate outer only - simulates case where inner is never written to
outerList.allocateNew();
outerList.setValueCount(0);

// Get field buffers - this is what IPC serialization uses
List<ArrowBuf> innerBuffers = innerList.getFieldBuffers();

// Verify inner list offset buffer has at least OFFSET_WIDTH (8) bytes
assertTrue(
innerBuffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH,
"Inner LargeList offset buffer should have at least "
+ LargeListVector.OFFSET_WIDTH
+ " bytes for offset[0]");

// Verify offset[0] = 0
assertEquals(0L, innerList.getOffsetBuffer().getLong(0));
}
}

private void writeIntValues(UnionLargeListWriter writer, int[] values) {
writer.startList();
for (int v : values) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,45 @@ public void testCopyValueSafeForExtensionType() throws Exception {
}
}

@Test
public void testNestedEmptyListOffsetBuffer() {
// Test that 3-level nested ListVector properly allocates offset buffers
// even when nested writers are never invoked. According to Arrow spec,
// offset buffer must have N+1 entries. Even when N=0, it should contain [0].
try (ListVector level0 = ListVector.empty("level0", allocator)) {
// Setup List<List<List<Int>>> - 3 levels
level0.addOrGetVector(FieldType.nullable(MinorType.LIST.getType()));
ListVector level1 = (ListVector) level0.getDataVector();
level1.addOrGetVector(FieldType.nullable(MinorType.LIST.getType()));
ListVector level2 = (ListVector) level1.getDataVector();
level2.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));

// Only allocate level0 - simulates case where all nested levels are empty
level0.allocateNew();
level0.setValueCount(0);

// Verify all levels have properly allocated offset buffers
List<ArrowBuf> level1Buffers = level1.getFieldBuffers();
List<ArrowBuf> level2Buffers = level2.getFieldBuffers();

assertTrue(
level1Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH,
"Level1 offset buffer should have at least "
+ BaseRepeatedValueVector.OFFSET_WIDTH
+ " bytes for offset[0]");

assertTrue(
level2Buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH,
"Level2 offset buffer should have at least "
+ BaseRepeatedValueVector.OFFSET_WIDTH
+ " bytes for offset[0]");

// Verify offset[0] = 0 for all levels
assertEquals(0, level1.getOffsetBuffer().getInt(0));
assertEquals(0, level2.getOffsetBuffer().getInt(0));
}
}

private void writeIntValues(UnionListWriter writer, int[] values) {
writer.startList();
for (int v : values) {
Expand Down
Loading