Java源码示例:org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter

示例1
public ExaParquetWriterImpl(final List<String> colNames,
                            final List<TypeInfo> colTypes,
                            final Configuration conf,
                            final Path path,
                            final String compressionType,
                            final ExaIterator exa,
                            final int firstColumnIndex,
                            final List<Integer> dynamicPartitionExaColNums) throws Exception {
    this(HiveSchemaConverter.convert(colNames, colTypes),
            colNames.size(),
            conf,
            path,
            compressionType,
            exa,
            firstColumnIndex,
            dynamicPartitionExaColNums);
}
 
示例2
public ExaParquetWriterImpl(final List<String> colNames,
                            final List<TypeInfo> colTypes,
                            final Configuration conf,
                            final Path path,
                            final String compressionType,
                            final ExaIterator exa,
                            final int firstColumnIndex,
                            final List<Integer> dynamicPartitionExaColNums) throws Exception {

    this(HiveSchemaConverter.convert(colNames, colTypes),
            colNames.size(),
            conf,
            path,
            compressionType,
            exa,
            firstColumnIndex,
            dynamicPartitionExaColNums);
}
 
示例3
@Test
public void testMapOriginalType() throws Exception {
  final String hiveColumnTypes = "map<string,string>";
  final String hiveColumnNames = "mapCol";
  final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
  final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
  final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
  // this messageType only has one optional field, whose name is mapCol, original Type is MAP
  assertEquals(1, messageTypeFound.getFieldCount());
  org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
  assertEquals("mapCol",topLevel.getName());
  assertEquals(OriginalType.MAP, topLevel.getOriginalType());
  assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());

  assertEquals(1, topLevel.asGroupType().getFieldCount());
  org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
  //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
  assertEquals("map", secondLevel.getName());
  assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
  assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
}
 
示例4
private static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties)
        throws IOException
{
    if (conf.get(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA) == null) {
        List<String> columnNames = Splitter.on(',').splitToList(properties.getProperty(IOConstants.COLUMNS));
        List<TypeInfo> columnTypes = getTypeInfosFromTypeString(properties.getProperty(IOConstants.COLUMNS_TYPES));
        MessageType schema = HiveSchemaConverter.convert(columnNames, columnTypes);
        setParquetSchema(conf, schema);
    }

    ParquetOutputFormat<ParquetHiveRecord> outputFormat = new ParquetOutputFormat<>(new DataWritableWriteSupport());

    return new ParquetRecordWriterWrapper(outputFormat, conf, target.toString(), Reporter.NULL, properties);
}
 
示例5
/**
 *
 * Create the parquet schema from the hive schema, and return the RecordWriterWrapper which
 * contains the real output format
 */
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(
    final JobConf jobConf,
    final Path finalOutPath,
    final Class<? extends Writable> valueClass,
    final boolean isCompressed,
    final Properties tableProperties,
    final Progressable progress) throws IOException {

  LOG.info("creating new record writer...{}", this);

  final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES);
  List<String> columnNames;
  List<TypeInfo> columnTypes;

  if (columnNameProperty.length() == 0) {
    columnNames = new ArrayList<String>();
  } else {
    columnNames = Arrays.asList(columnNameProperty.split(","));
  }

  if (columnTypeProperty.length() == 0) {
    columnTypes = new ArrayList<TypeInfo>();
  } else {
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
  }

  DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf);
  return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress);
}
 
示例6
private void testConversion(final String columnNamesStr, final String columnsTypeStr, final String expectedSchema) throws Exception {
  final List<String> columnNames = createHiveColumnsFrom(columnNamesStr);
  final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(columnsTypeStr);
  final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
  final MessageType expectedMT = MessageTypeParser.parseMessageType(expectedSchema);
  assertEquals("converting " + columnNamesStr + ": " + columnsTypeStr + " to " + expectedSchema, expectedMT, messageTypeFound);
}