Java源码示例:org.apache.parquet.column.page.PageReader
示例1
/**
* creates a reader for triplets
* @param path the descriptor for the corresponding column
* @param pageReader the underlying store to read from
* @param converter a converter that materializes the values in this column in the current record
* @param writerVersion writer version string from the Parquet file being read
*/
ColumnReaderBase(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) {
this.path = Objects.requireNonNull(path, "path cannot be null");
this.pageReader = Objects.requireNonNull(pageReader, "pageReader cannot be null");
this.converter = Objects.requireNonNull(converter, "converter cannot be null");
this.writerVersion = writerVersion;
this.maxDefinitionLevel = path.getMaxDefinitionLevel();
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
if (converter.hasDictionarySupport()) {
converter.setDictionary(dictionary);
}
} catch (IOException e) {
throw new ParquetDecodingException("could not decode the dictionary for " + path, e);
}
} else {
this.dictionary = null;
}
this.totalValueCount = pageReader.getTotalValueCount();
if (totalValueCount <= 0) {
throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0");
}
}
示例2
@Test
public void test() throws IOException {
MemPageStore memPageStore = new MemPageStore(10);
ColumnDescriptor col = new ColumnDescriptor(path , PrimitiveTypeName.INT64, 2, 2);
LongStatistics stats = new LongStatistics();
PageWriter pageWriter = memPageStore.getPageWriter(col);
pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
PageReader pageReader = memPageStore.getPageReader(col);
long totalValueCount = pageReader.getTotalValueCount();
System.out.println(totalValueCount);
int total = 0;
do {
DataPage readPage = pageReader.readPage();
total += readPage.getValueCount();
System.out.println(readPage);
// TODO: assert
} while (total < totalValueCount);
}
示例3
public Dictionary setRowGroupInfo(PageReader store, boolean allPagesDictEncoded) {
// setPageSource can result in a data page read. If that happens, we need
// to know in advance whether all the pages in the row group are dictionary encoded or not
this.vectorizedPageIterator.setAllPagesDictEncoded(allPagesDictEncoded);
super.setPageSource(store);
return dictionary;
}
示例4
public void setPageSource(PageReader source) {
this.pageSource = source;
this.triplesCount = source.getTotalValueCount();
this.triplesRead = 0L;
this.advanceNextPageCount = 0L;
BasePageIterator pageIterator = pageIterator();
pageIterator.reset();
dictionary = ParquetUtil.readDictionary(desc, pageSource);
pageIterator.setDictionary(dictionary);
advance();
}
示例5
public static Dictionary readDictionary(ColumnDescriptor desc, PageReader pageSource) {
DictionaryPage dictionaryPage = pageSource.readDictionaryPage();
if (dictionaryPage != null) {
try {
return dictionaryPage.getEncoding().initDictionary(desc, dictionaryPage);
} catch (IOException e) {
throw new ParquetDecodingException("could not decode the dictionary for " + desc, e);
}
}
return null;
}
示例6
public void setPageSource(PageReader source) {
this.pageSource = source;
this.triplesCount = source.getTotalValueCount();
this.triplesRead = 0L;
this.advanceNextPageCount = 0L;
this.pageIterator.reset();
this.pageIterator.setDictionary(readDictionary(desc, pageSource));
advance();
}
示例7
private static Dictionary readDictionary(ColumnDescriptor desc, PageReader pageSource) {
DictionaryPage dictionaryPage = pageSource.readDictionaryPage();
if (dictionaryPage != null) {
try {
return dictionaryPage.getEncoding().initDictionary(desc, dictionaryPage);
// if (converter.hasDictionarySupport()) {
// converter.setDictionary(dictionary);
// }
} catch (IOException e) {
throw new ParquetDecodingException("could not decode the dictionary for " + desc, e);
}
}
return null;
}
示例8
private void validateContains(MessageType schema, PageReadStore pages, String[] path, int values, BytesInput bytes)
throws IOException {
PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path));
DataPageV1 page = (DataPageV1) pageReader.readPage();
assertEquals(values, page.getValueCount());
assertArrayEquals(bytes.toByteArray(), page.getBytes().toByteArray());
}
示例9
public AbstractColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
this.descriptor = descriptor;
this.pageReader = pageReader;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
/*
* Total number of values in this column (in this row group).
*/
long totalValueCount = pageReader.getTotalValueCount();
if (totalValueCount == 0) {
throw new IOException("totalValueCount == 0");
}
}
示例10
public FixedLenBytesColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader,
int precision) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY);
this.precision = precision;
}
示例11
public TimestampColumnReader(
boolean utcTimestamp,
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
this.utcTimestamp = utcTimestamp;
checkTypeName(PrimitiveType.PrimitiveTypeName.INT96);
}
示例12
@Override
public ColumnReader getColumnReader(ColumnDescriptor path) {
PrimitiveConverter converter = getPrimitiveConverter(path);
PageReader pageReader = pageReadStore.getPageReader(path);
Optional<PrimitiveIterator.OfLong> rowIndexes = pageReadStore.getRowIndexes();
if (rowIndexes.isPresent()) {
return new SynchronizingColumnReader(path, pageReader, converter, writerVersion, rowIndexes.get());
} else {
return new ColumnReaderImpl(path, pageReader, converter, writerVersion);
}
}
示例13
SynchronizingColumnReader(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter,
ParsedVersion writerVersion, PrimitiveIterator.OfLong rowIndexes) {
super(path, pageReader, converter, writerVersion);
this.rowIndexes = rowIndexes;
targetRow = Long.MIN_VALUE;
consume();
}
示例14
@Override
public PageReader getPageReader(ColumnDescriptor descriptor) {
MemPageWriter pageWriter = pageWriters.get(descriptor);
if (pageWriter == null) {
throw new UnknownColumnException(descriptor);
}
List<DataPage> pages = new ArrayList<>(pageWriter.getPages());
LOG.debug("initialize page reader with {} values and {} pages", pageWriter.getTotalValueCount(), pages.size());
return new MemPageReader(pageWriter.getTotalValueCount(), pages.iterator(), pageWriter.getDictionaryPage());
}
示例15
@Override
public PageReader getPageReader(ColumnDescriptor path) {
final PageReader pageReader = readers.get(path);
if (pageReader == null) {
throw new IllegalArgumentException(path + " is not in the store: " + readers.keySet() + " " + rowCount);
}
return pageReader;
}
示例16
private void validateV2Page(MessageType schema, PageReadStore pages, String[] path, int values, int rows, int nullCount,
byte[] repetition, byte[] definition, byte[] data, int uncompressedSize) throws IOException {
PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path));
DataPageV2 page = (DataPageV2)pageReader.readPage();
assertEquals(values, page.getValueCount());
assertEquals(rows, page.getRowCount());
assertEquals(nullCount, page.getNullCount());
assertEquals(uncompressedSize, page.getUncompressedSize());
assertArrayEquals(repetition, page.getRepetitionLevels().toByteArray());
assertArrayEquals(definition, page.getDefinitionLevels().toByteArray());
assertArrayEquals(data, page.getData().toByteArray());
}
示例17
private static List<DataPage> getPageGroupForColumn(PageReadStore pageReadStore, ColumnDescriptor columnDescriptor) {
PageReader pageReader = pageReadStore.getPageReader(columnDescriptor);
List<DataPage> pageGroup = new ArrayList<DataPage>();
DataPage page;
while ((page = pageReader.readPage()) != null) {
pageGroup.add(reusableCopy(page));
}
return pageGroup;
}
示例18
public void validate(MessageType schema, PageReadStore store) {
for (ColumnDescriptor desc : schema.getColumns()) {
PageReader reader = store.getPageReader(desc);
DictionaryPage dict = reader.readDictionaryPage();
DataPage page;
while ((page = reader.readPage()) != null) {
validateStatsForPage(page, dict, desc);
}
}
}
示例19
@Override
public PageReader getPageReader(ColumnDescriptor descriptor) {
return columns.get(descriptor);
}
示例20
@Override
public PageReader getPageReader(ColumnDescriptor descriptor) {
return columns.get(descriptor);
}
示例21
public ByteColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.INT32);
}
示例22
public BooleanColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.BOOLEAN);
}
示例23
public LongColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.INT64);
}
示例24
public ShortColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.INT32);
}
示例25
public DoubleColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.DOUBLE);
}
示例26
public IntColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.INT32);
}
示例27
public BytesColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.BINARY);
}
示例28
public FloatColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
super(descriptor, pageReader);
checkTypeName(PrimitiveType.PrimitiveTypeName.FLOAT);
}
示例29
public static ColumnReader createColumnReader(
boolean utcTimestamp,
LogicalType fieldType,
ColumnDescriptor descriptor,
PageReader pageReader) throws IOException {
switch (fieldType.getTypeRoot()) {
case BOOLEAN:
return new BooleanColumnReader(descriptor, pageReader);
case TINYINT:
return new ByteColumnReader(descriptor, pageReader);
case DOUBLE:
return new DoubleColumnReader(descriptor, pageReader);
case FLOAT:
return new FloatColumnReader(descriptor, pageReader);
case INTEGER:
case DATE:
case TIME_WITHOUT_TIME_ZONE:
return new IntColumnReader(descriptor, pageReader);
case BIGINT:
return new LongColumnReader(descriptor, pageReader);
case SMALLINT:
return new ShortColumnReader(descriptor, pageReader);
case CHAR:
case VARCHAR:
case BINARY:
case VARBINARY:
return new BytesColumnReader(descriptor, pageReader);
case TIMESTAMP_WITHOUT_TIME_ZONE:
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
return new TimestampColumnReader(utcTimestamp, descriptor, pageReader);
case DECIMAL:
switch (descriptor.getPrimitiveType().getPrimitiveTypeName()) {
case INT32:
return new IntColumnReader(descriptor, pageReader);
case INT64:
return new LongColumnReader(descriptor, pageReader);
case BINARY:
return new BytesColumnReader(descriptor, pageReader);
case FIXED_LEN_BYTE_ARRAY:
return new FixedLenBytesColumnReader(
descriptor, pageReader, ((DecimalType) fieldType).getPrecision());
}
default:
throw new UnsupportedOperationException(fieldType + " is not supported now.");
}
}
示例30
private ColumnReaderImpl newMemColumnReader(ColumnDescriptor path, PageReader pageReader) {
PrimitiveConverter converter = getPrimitiveConverter(path);
return new ColumnReaderImpl(path, pageReader, converter, writerVersion);
}