Java源码示例:org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter
示例1
public ExaParquetWriterImpl(final List<String> colNames,
final List<TypeInfo> colTypes,
final Configuration conf,
final Path path,
final String compressionType,
final ExaIterator exa,
final int firstColumnIndex,
final List<Integer> dynamicPartitionExaColNums) throws Exception {
this(HiveSchemaConverter.convert(colNames, colTypes),
colNames.size(),
conf,
path,
compressionType,
exa,
firstColumnIndex,
dynamicPartitionExaColNums);
}
示例2
public ExaParquetWriterImpl(final List<String> colNames,
final List<TypeInfo> colTypes,
final Configuration conf,
final Path path,
final String compressionType,
final ExaIterator exa,
final int firstColumnIndex,
final List<Integer> dynamicPartitionExaColNums) throws Exception {
this(HiveSchemaConverter.convert(colNames, colTypes),
colNames.size(),
conf,
path,
compressionType,
exa,
firstColumnIndex,
dynamicPartitionExaColNums);
}
示例3
@Test
public void testMapOriginalType() throws Exception {
final String hiveColumnTypes = "map<string,string>";
final String hiveColumnNames = "mapCol";
final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
// this messageType only has one optional field, whose name is mapCol, original Type is MAP
assertEquals(1, messageTypeFound.getFieldCount());
org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
assertEquals("mapCol",topLevel.getName());
assertEquals(OriginalType.MAP, topLevel.getOriginalType());
assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());
assertEquals(1, topLevel.asGroupType().getFieldCount());
org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
//there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
assertEquals("map", secondLevel.getName());
assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
}
示例4
private static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties)
throws IOException
{
if (conf.get(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA) == null) {
List<String> columnNames = Splitter.on(',').splitToList(properties.getProperty(IOConstants.COLUMNS));
List<TypeInfo> columnTypes = getTypeInfosFromTypeString(properties.getProperty(IOConstants.COLUMNS_TYPES));
MessageType schema = HiveSchemaConverter.convert(columnNames, columnTypes);
setParquetSchema(conf, schema);
}
ParquetOutputFormat<ParquetHiveRecord> outputFormat = new ParquetOutputFormat<>(new DataWritableWriteSupport());
return new ParquetRecordWriterWrapper(outputFormat, conf, target.toString(), Reporter.NULL, properties);
}
示例5
/**
*
* Create the parquet schema from the hive schema, and return the RecordWriterWrapper which
* contains the real output format
*/
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(
final JobConf jobConf,
final Path finalOutPath,
final Class<? extends Writable> valueClass,
final boolean isCompressed,
final Properties tableProperties,
final Progressable progress) throws IOException {
LOG.info("creating new record writer...{}", this);
final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS);
final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES);
List<String> columnNames;
List<TypeInfo> columnTypes;
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(","));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf);
return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress);
}
示例6
private void testConversion(final String columnNamesStr, final String columnsTypeStr, final String expectedSchema) throws Exception {
final List<String> columnNames = createHiveColumnsFrom(columnNamesStr);
final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr);
final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
final MessageType expectedMT = MessageTypeParser.parseMessageType(expectedSchema);
assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + expectedSchema, expectedMT, messageTypeFound);
}