Java源码示例:org.apache.spark.sql.hive.HiveContext

示例1
@Override
public DDF loadFromJDBC(JDBCDataSourceDescriptor dataSource) throws DDFException {
    SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
    HiveContext sqlContext = sparkDDFManager.getHiveContext();

    JDBCDataSourceCredentials cred = (JDBCDataSourceCredentials)dataSource.getDataSourceCredentials();
    String fullURL = dataSource.getDataSourceUri().getUri().toString();
    if (cred.getUsername() != null &&  !cred.getUsername().equals("")) {
        fullURL += String.format("?user=%s&password=%s", cred.getUsername(), cred.getPassword());
    }

    Map<String, String> options = new HashMap<String, String>();
    options.put("url", fullURL);
    options.put("dbtable", dataSource.getDbTable());
    DataFrame df = sqlContext.load("jdbc", options);

    DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
        null, SparkUtils.schemaFromDataFrame(df));
    // TODO?
    ddf.getRepresentationHandler().get(RDD.class, Row.class);
    ddf.getMetaDataHandler().setDataSourceDescriptor(dataSource);
    return ddf;
}
 
示例2
@Override
public DDF loadSpecialFormat(DataFormat format, URI fileURI, Boolean flatten) throws DDFException {
    SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
    HiveContext sqlContext = sparkDDFManager.getHiveContext();
    DataFrame jdf = null;
    switch (format) {
        case JSON:
            jdf = sqlContext.jsonFile(fileURI.toString());
            break;
        case PQT:
            jdf = sqlContext.parquetFile(fileURI.toString());
            break;
        default:
            throw new DDFException(String.format("Unsupported data format: %s", format.toString()));
    }

    DataFrame df = SparkUtils.getDataFrameWithValidColnames(jdf);
    DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
        null, SparkUtils.schemaFromDataFrame(df));

    if(flatten == true)
        return ddf.getFlattenedDDF();
    else
        return ddf;
}
 
示例3
public static void main( String[] args )
   {   	
//   	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("local[2]");
//      	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://Kavithas-MBP.home:7077");
	SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://kavithas-mbp.watson.ibm.com:7077");
   
   	JavaSparkContext sc = new JavaSparkContext(conf);
   	
   	HiveContext sqlContext = new HiveContext(sc.sc());
   	Dataset urls = sqlContext.read().json("/tmp/urls.json");

   	urls.registerTempTable("urls");
   	Dataset<Row> temp = sqlContext.sql("select * from urls");
   	temp.show();
   	
	   	sqlContext.sql("add jar /tmp/quetzal.jar");
	sqlContext.sql("create temporary function webservice as 'com.ibm.research.rdf.store.utilities.WebServiceGetUDTF'");
	Dataset<Row> drugs = sqlContext.sql("select webservice(\"drug,id,action\", \"url\", \"\", \"GET\", \"xs=http://www.w3.org/2001/XMLSchema\", \"//row\",\"drug\",\"./drug\","
			+ " \"<string>\", \"id\", \"./id\",\"<string>\", \"action\", \"./action\", \"<string>\", url) as (drug, drug_typ, id, id_typ, action, action_typ) from urls");
	drugs.show();
	System.out.println("Num rows:" + drugs.count());
   }
 
示例4
/**
 * 获取SQLContext
 * 如果是在本地测试环境的话,那么就生成SQLContext对象
 * 如果是在生产环境运行的话,那么就生成HiveContext对象
 * @param sc SparkContext
 * @return SQLContext
 */
private static SQLContext getSQLContext(SparkContext sc) {
	boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
	if(local) {
		return new SQLContext(sc);
	} else {
		return new HiveContext(sc);
	}
}
 
示例5
/**
 * 获取SQLContext
 * 如果spark.local设置为true,那么就创建SQLContext;否则,创建HiveContext
 * @param sc
 * @return
 */
public static SQLContext getSQLContext(SparkContext sc) {
	boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
	if(local) {
		return new SQLContext(sc);
	} else {
		return new HiveContext(sc);
	}
}
 
示例6
/**
 * 用于判断是否是生产环境
 * @param sc
 * @return
 */
public static SQLContext getSQLContext(SparkContext sc)
{
    boolean local= ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
    if(local)
    {
        return new SQLContext(sc);
    }
    return new HiveContext(sc);
}
 
示例7
private void initialize(SparkContext sparkContext, Map<String, String> params) throws DDFException {
  this.setSparkContext(sparkContext == null ? this.createSparkContext(params) : sparkContext);
  this.mHiveContext = new HiveContext(this.mSparkContext);
  String compression = System.getProperty("spark.sql.inMemoryColumnarStorage.compressed", "true");
  String batchSize = System.getProperty("spark.sql.inMemoryColumnarStorage.batchSize", "1000");
  mLog.info(">>>> spark.sql.inMemoryColumnarStorage.compressed= " + compression);
  mLog.info(">>>> spark.sql.inMemoryColumnarStorage.batchSize= " + batchSize);
  this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.compressed", compression);
  this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.batchSize", batchSize);

  // register SparkSQL UDFs
  this.registerUDFs();
  this.mDataSourceManager = new SparkDataSourceManager(this);
}
 
示例8
public boolean isTable() {
  HiveContext hiveContext = ((SparkDDFManager) this.getManager()).getHiveContext();
  String[] tableNames = hiveContext.tableNames();
  Boolean tableExists = false;
  for(String table: tableNames) {
    if(table.equals(this.getTableName())) {
      tableExists = true;
    }
  }
  return tableExists;
}
 
示例9
@Test
public void whenHiveContextIsSelectedInConfiguration(){

   Properties properties = new Properties();
   properties.put(AttributteNames.CT_HIVE_CONTEXT,"YES");
   sqlContext.loadConfiguration(properties);
   assertThat("When exist HiveContext then create instanceof HiveContext", sqlContext.getConnector(), instanceOf(HiveContext.class));
}
 
示例10
public static void main(String[] args) throws IOException {
  SparkConf conf = new SparkConf().setAppName("SQLQueryBAM");

  JavaSparkContext sc = new JavaSparkContext(conf);
  SQLContext sqlContext = new HiveContext(sc.sc());

  Options options = new Options();
  Option opOpt = new Option( "out", true, "HDFS path for output files. If not present, the output files are not moved to HDFS." );
  Option queryOpt = new Option( "query", true, "SQL query string." );
  Option baminOpt = new Option( "in", true, "" );

  options.addOption( opOpt );
  options.addOption( queryOpt );
  options.addOption( baminOpt );
  CommandLineParser parser = new BasicParser();
  CommandLine cmd = null;
  try {
    cmd = parser.parse( options, args );

  }
  catch( ParseException exp ) {
    System.err.println( "Parsing failed.  Reason: " + exp.getMessage() );
  }

  String bwaOutDir = (cmd.hasOption("out")==true)? cmd.getOptionValue("out"):null;
  String query = (cmd.hasOption("query")==true)? cmd.getOptionValue("query"):null;
  String bamin = (cmd.hasOption("in")==true)? cmd.getOptionValue("in"):null;

  sc.hadoopConfiguration().setBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, true);

  //Read BAM/SAM from HDFS
  JavaPairRDD<LongWritable, SAMRecordWritable> bamPairRDD = sc.newAPIHadoopFile(bamin, AnySAMInputFormat.class, LongWritable.class, SAMRecordWritable.class, sc.hadoopConfiguration());
  //Map to SAMRecord RDD
  JavaRDD<SAMRecord> samRDD = bamPairRDD.map(v1 -> v1._2().get());
  JavaRDD<MyAlignment> rdd = samRDD.map(bam -> new MyAlignment(bam.getReadName(), bam.getStart(), bam.getReferenceName(), bam.getReadLength(), new String(bam.getReadBases(), StandardCharsets.UTF_8), bam.getCigarString(), bam.getReadUnmappedFlag(), bam.getDuplicateReadFlag()));

  Dataset<Row> samDF = sqlContext.createDataFrame(rdd, MyAlignment.class);
  samDF.registerTempTable(tablename);
  if(query!=null) {

    //Save as parquet file
    Dataset df2 = sqlContext.sql(query);
    df2.show(100,false);

    if(bwaOutDir!=null)
      df2.write().parquet(bwaOutDir);

  }else{
    if(bwaOutDir!=null)
      samDF.write().parquet(bwaOutDir);
  }

  sc.stop();

}
 
示例11
public HiveContext getHiveContext() {
  return mHiveContext;
}
 
示例12
private HiveContext getHiveContext() {
  return ((SparkDDFManager) this.getManager()).getHiveContext();
}
 
示例13
/**
 * Build an HiveContext type .
 * @param sparkContext to configure HiveContext
 * @return HiveContext
 */
@Override
public HiveContext build(SparkContext sparkContext) {
    return new HiveContext(sparkContext);
}