Java源码示例:org.apache.kylin.common.util.StringUtil
示例1
@Override
public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow) {
final String jobWorkingDir = getJobWorkingDir(jobFlow, hdfsWorkingDir);
org.apache.kylin.source.hive.GarbageCollectionStep step = new org.apache.kylin.source.hive.GarbageCollectionStep();
step.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP);
List<String> deleteTables = new ArrayList<>();
deleteTables.add(getIntermediateTableIdentity());
// mr-hive dict and inner table do not need delete hdfs
String[] mrHiveDicts = flatDesc.getSegment().getConfig().getMrHiveDictColumns();
if (Objects.nonNull(mrHiveDicts) && mrHiveDicts.length > 0) {
String dictDb = flatDesc.getSegment().getConfig().getMrHiveDictDB();
String tableName = dictDb + "." + flatDesc.getTableName() + "_"
+ MRHiveDictUtil.DictHiveType.GroupBy.getName();
deleteTables.add(tableName);
}
step.setIntermediateTables(deleteTables);
step.setExternalDataPaths(Collections.singletonList(JoinedFlatTable.getTableDir(flatDesc, jobWorkingDir)));
step.setHiveViewIntermediateTableIdentities(StringUtil.join(hiveViewIntermediateTables, ","));
jobFlow.addTask(step);
}
示例2
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
StringBuilder cmd = new StringBuilder();
appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);
appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");
mergeDictionaryStep.setMapReduceParams(cmd.toString());
mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);
return mergeDictionaryStep;
}
示例3
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
final String jobId, final String cuboidRootPath) {
final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
tablePath);
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
getSegmentMetadataUrl(seg.getConfig(), jobId));
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
flinkExecutable.setJobId(jobId);
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
flinkExecutable.setJars(jars.toString());
flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
示例4
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
sparkExecutable.setClassName(SparkFactDistinct.class.getName());
sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));
sparkExecutable.setJobId(jobId);
sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
return sparkExecutable;
}
示例5
public FlinkExecutable createMergeDictionaryFlinkStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
final FlinkExecutable flinkExecutable = new FlinkExecutable();
flinkExecutable.setClassName(FlinkMergingDictionary.class.getName());
flinkExecutable.setParam(FlinkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
flinkExecutable.setParam(FlinkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
flinkExecutable.setParam(FlinkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
flinkExecutable.setParam(FlinkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
flinkExecutable.setParam(FlinkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
flinkExecutable.setJobId(jobID);
flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
flinkExecutable.setFlinkConfigName(ExecutableConstants.FLINK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
flinkExecutable.setJars(jars.toString());
return flinkExecutable;
}
示例6
public void check(List<String> segFullNameList) {
issueExistHTables = Lists.newArrayList();
inconsistentHTables = Lists.newArrayList();
for (String segFullName : segFullNameList) {
String[] sepNameList = StringUtil.splitByComma(segFullName);
try {
HTableDescriptor hTableDescriptor = hbaseAdmin.getTableDescriptor(TableName.valueOf(sepNameList[0]));
String host = hTableDescriptor.getValue(IRealizationConstants.HTableTag);
if (!dstCfg.getMetadataUrlPrefix().equalsIgnoreCase(host)) {
inconsistentHTables.add(segFullName);
}
} catch (IOException e) {
issueExistHTables.add(segFullName);
continue;
}
}
}
示例7
/**
* Regenerate table cardinality
*
* @return Table metadata array
* @throws IOException
*/
@RequestMapping(value = "/{project}/{tableNames}/cardinality", method = { RequestMethod.PUT }, produces = {
"application/json" })
@ResponseBody
public CardinalityRequest generateCardinality(@PathVariable String tableNames,
@RequestBody CardinalityRequest request, @PathVariable String project) throws Exception {
String submitter = SecurityContextHolder.getContext().getAuthentication().getName();
String[] tables = StringUtil.splitByComma(tableNames);
try {
for (String table : tables) {
tableService.calculateCardinality(table.trim().toUpperCase(Locale.ROOT), submitter, project);
}
} catch (IOException e) {
logger.error("Failed to calculate cardinality", e);
throw new InternalErrorException(e.getLocalizedMessage(), e);
}
return request;
}
示例8
void init(DataModelDesc model) {
table = table.toUpperCase(Locale.ROOT);
if (columns != null) {
StringUtil.toUpperCaseArray(columns, columns);
}
if (model != null) {
table = model.findTable(table).getAlias();
if (columns != null) {
for (int i = 0; i < columns.length; i++) {
TblColRef column = model.findColumn(table, columns[i]);
if (column.getColumnDesc().isComputedColumn() && !model.isFactTable(column.getTableRef())) {
throw new RuntimeException("Computed Column on lookup table is not allowed");
}
columns[i] = column.getName();
}
}
}
}
示例9
public MapReduceExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
MapReduceExecutable mergeDictionaryStep = new MapReduceExecutable();
mergeDictionaryStep.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
StringBuilder cmd = new StringBuilder();
appendMapReduceParameters(cmd, JobEngineConfig.CUBE_MERGE_JOB_CONF_SUFFIX);
appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
appendExecCmdParameters(cmd, BatchConstants.ARG_META_URL, getSegmentMetadataUrl(seg.getConfig(), jobID));
appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
appendExecCmdParameters(cmd, MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Dictionary_" + seg.getCubeInstance().getName() + "_Step");
mergeDictionaryStep.setMapReduceParams(cmd.toString());
mergeDictionaryStep.setMapReduceJobClass(MergeDictionaryJob.class);
return mergeDictionaryStep;
}
示例10
@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
super.doSetup(context);
final SerializableConfiguration sConf = new SerializableConfiguration(context.getConfiguration());
final String metaUrl = context.getConfiguration().get(BatchConstants.ARG_META_URL);
final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
final String segmentIds = context.getConfiguration().get(MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt());
final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
final CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName);
final CubeDesc cubeDesc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeInstance.getDescName());
mergingSegments = getMergingSegments(cubeInstance, StringUtil.splitByComma(segmentIds));
tblColRefs = cubeDesc.getAllColumnsNeedDictionaryBuilt().toArray(new TblColRef[0]);
dictMgr = DictionaryManager.getInstance(kylinConfig);
}
示例11
protected void handleMetadataPersistException(ExecutableContext context, Throwable exception) {
final String[] adminDls = context.getConfig().getAdminDls();
if (adminDls == null || adminDls.length < 1) {
logger.warn(NO_NEED_TO_SEND_EMAIL_USER_LIST_IS_EMPTY);
return;
}
List<String> users = Lists.newArrayList(adminDls);
Map<String, Object> dataMap = Maps.newHashMap();
dataMap.put("job_name", getName());
dataMap.put("env_name", context.getConfig().getDeployEnv());
dataMap.put(SUBMITTER, StringUtil.noBlank(getSubmitter(), "missing submitter"));
dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
dataMap.put("error_log",
Matcher.quoteReplacement(StringUtil.noBlank(exception.getMessage(), "no error message")));
String content = MailNotificationUtil.getMailContent(MailNotificationUtil.METADATA_PERSIST_FAIL, dataMap);
String title = MailNotificationUtil.getMailTitle("METADATA PERSIST", "FAIL",
context.getConfig().getDeployEnv());
new MailService(context.getConfig()).sendMail(users, title, content);
}
示例12
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
sparkExecutable.setJobId(jobId);
sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
return sparkExecutable;
}
示例13
public MapReduceExecutable createMergeCuboidDataStep(CubeSegment seg, List<CubeSegment> mergingSegments,
String jobID, Class<? extends AbstractHadoopJob> clazz) {
final List<String> mergingCuboidPaths = Lists.newArrayList();
for (CubeSegment merging : mergingSegments) {
mergingCuboidPaths.add(getCuboidRootPath(merging) + "*");
}
String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
String outputPath = getCuboidRootPath(jobID);
MapReduceExecutable mergeCuboidDataStep = new MapReduceExecutable();
mergeCuboidDataStep.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);
StringBuilder cmd = new StringBuilder();
appendMapReduceParameters(cmd);
appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getCubeInstance().getName());
appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, formattedPath);
appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, outputPath);
appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME,
"Kylin_Merge_Cuboid_" + seg.getCubeInstance().getName() + "_Step");
mergeCuboidDataStep.setMapReduceParams(cmd.toString());
mergeCuboidDataStep.setMapReduceJobClass(clazz);
return mergeCuboidDataStep;
}
示例14
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
final String jobId, final String cuboidRootPath) {
final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
tablePath);
sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
getSegmentMetadataUrl(seg.getConfig(), jobId));
sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
sparkExecutable.setJobId(jobId);
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
示例15
public void update() {
logger.info("Reloading Cube Metadata from store: " + store.getReadableResourcePath(ResourceStore.CUBE_DESC_RESOURCE_ROOT));
CubeDescManager cubeDescManager = CubeDescManager.getInstance(config);
List<CubeDesc> cubeDescs;
if (ArrayUtils.isEmpty(cubeNames)) {
cubeDescs = cubeDescManager.listAllDesc();
} else {
String[] names = StringUtil.splitByComma(cubeNames[0]);
if (ArrayUtils.isEmpty(names))
return;
cubeDescs = Lists.newArrayListWithCapacity(names.length);
for (String name : names) {
cubeDescs.add(cubeDescManager.getCubeDesc(name));
}
}
for (CubeDesc cubeDesc : cubeDescs) {
updateCubeDesc(cubeDesc);
}
verify();
}
示例16
public SparkExecutable createMergeDictionaryStep(CubeSegment seg, String jobID, List<String> mergingSegmentIds) {
final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
sparkExecutable.setClassName(SparkMergingDictionary.class.getName());
sparkExecutable.setParam(SparkMergingDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkMergingDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setParam(SparkMergingDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
sparkExecutable.setParam(SparkMergingDictionary.OPTION_MERGE_SEGMENT_IDS.getOpt(), StringUtil.join(mergingSegmentIds, ","));
sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_DICT.getOpt(), getDictInfoPath(jobID));
sparkExecutable.setParam(SparkMergingDictionary.OPTION_OUTPUT_PATH_STAT.getOpt(), getStatisticsPath(jobID));
sparkExecutable.setJobId(jobID);
sparkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY + ":" + seg.toString());
sparkExecutable.setSparkConfigName(ExecutableConstants.SPARK_SPECIFIC_CONFIG_NAME_MERGE_DICTIONARY);
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
return sparkExecutable;
}
示例17
/**
* Read the given path as a Java RDD; The path can have second level sub folder.
* @param inputPath
* @param fs
* @param sc
* @param keyClass
* @param valueClass
* @return
* @throws IOException
*/
public static JavaPairRDD parseInputPath(String inputPath, FileSystem fs, JavaSparkContext sc, Class keyClass,
Class valueClass) throws IOException {
List<String> inputFolders = Lists.newArrayList();
Path inputHDFSPath = new Path(inputPath);
FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
boolean hasDir = false;
for (FileStatus stat : fileStatuses) {
if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
hasDir = true;
inputFolders.add(stat.getPath().toString());
}
}
if (!hasDir) {
return sc.sequenceFile(inputHDFSPath.toString(), keyClass, valueClass);
}
return sc.sequenceFile(StringUtil.join(inputFolders, ","), keyClass, valueClass);
}
示例18
public SparkExecutable createFactDistinctColumnsSparkStep(String jobId) {
final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
sparkExecutable.setClassName(SparkFactDistinct.class.getName());
sparkExecutable.setParam(SparkFactDistinct.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkFactDistinct.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
sparkExecutable.setParam(SparkFactDistinct.OPTION_INPUT_PATH.getOpt(), tablePath);
sparkExecutable.setParam(SparkFactDistinct.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
sparkExecutable.setParam(SparkFactDistinct.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setParam(SparkFactDistinct.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));
sparkExecutable.setJobId(jobId);
sparkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS + ":" + seg.toString());
sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
return sparkExecutable;
}
示例19
public SparkExecutable createBuildUHCDictSparkStep(String jobId) {
final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
sparkExecutable.setClassName(SparkUHCDictionary.class.getName());
sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkUHCDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
sparkExecutable.setParam(SparkUHCDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
sparkExecutable.setParam(SparkUHCDictionary.OPTION_OUTPUT_PATH.getOpt(), getDictRootPath(jobId));
sparkExecutable.setParam(SparkUHCDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
sparkExecutable.setParam(SparkUHCDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setJobId(jobId);
sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_UHC_DICTIONARY);
sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
return sparkExecutable;
}
示例20
public SparkExecutable createBuildDictionarySparkStep(String jobId) {
final SparkExecutable sparkExecutable = SparkExecutableFactory.instance(seg.getConfig());
sparkExecutable.setClassName(SparkBuildDictionary.class.getName());
sparkExecutable.setParam(SparkBuildDictionary.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkBuildDictionary.OPTION_INPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
sparkExecutable.setParam(SparkBuildDictionary.OPTION_DICT_PATH.getOpt(), getDictRootPath(jobId));
sparkExecutable.setParam(SparkBuildDictionary.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setParam(SparkBuildDictionary.OPTION_CUBING_JOB_ID.getOpt(), jobId);
sparkExecutable.setJobId(jobId);
sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_DICTIONARY);
sparkExecutable.setCounterSaveAs(CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
return sparkExecutable;
}
示例21
public void configureSparkJob(final CubeSegment seg, final SparkExecutable sparkExecutable,
final String jobId, final String cuboidRootPath) {
final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
sparkExecutable.setParam(SparkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
sparkExecutable.setParam(SparkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
tablePath);
sparkExecutable.setParam(SparkCubingByLayer.OPTION_META_URL.getOpt(),
getSegmentMetadataUrl(seg.getConfig(), jobId));
sparkExecutable.setParam(SparkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
sparkExecutable.setJobId(jobId);
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getSparkAdditionalJars());
sparkExecutable.setJars(jars.toString());
sparkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_SPARK_CUBE + ":" + seg.toString());
}
示例22
@RequestMapping(value = "/{tables}/{project}", method = { RequestMethod.DELETE }, produces = { "application/json" })
@ResponseBody
public Map<String, String[]> unLoadHiveTables(@PathVariable String tables, @PathVariable String project) {
Set<String> unLoadSuccess = Sets.newHashSet();
Set<String> unLoadFail = Sets.newHashSet();
Map<String, String[]> result = new HashMap<String, String[]>();
try {
for (String tableName : StringUtil.splitByComma(tables)) {
tableACLService.deleteFromTableACLByTbl(project, tableName);
if (tableService.unloadHiveTable(tableName, project)) {
unLoadSuccess.add(tableName);
} else {
unLoadFail.add(tableName);
}
}
} catch (Throwable e) {
logger.error("Failed to unload Hive Table", e);
throw new InternalErrorException(e.getLocalizedMessage(), e);
}
result.put("result.unload.success", (String[]) unLoadSuccess.toArray(new String[unLoadSuccess.size()]));
result.put("result.unload.fail", (String[]) unLoadFail.toArray(new String[unLoadFail.size()]));
return result;
}
示例23
/**
* Regenerate table cardinality
*
* @return Table metadata array
* @throws IOException
*/
@RequestMapping(value = "/{project}/{tableNames}/cardinality", method = { RequestMethod.PUT }, produces = {
"application/json" })
@ResponseBody
public CardinalityRequest generateCardinality(@PathVariable String tableNames,
@RequestBody CardinalityRequest request, @PathVariable String project) throws Exception {
String submitter = SecurityContextHolder.getContext().getAuthentication().getName();
String[] tables = StringUtil.splitByComma(tableNames);
try {
for (String table : tables) {
tableService.calculateCardinality(table.trim().toUpperCase(Locale.ROOT), submitter, project);
}
} catch (IOException e) {
logger.error("Failed to calculate cardinality", e);
throw new InternalErrorException(e.getLocalizedMessage(), e);
}
return request;
}
示例24
protected void handleMetadataPersistException(ExecutableContext context, Throwable exception) {
final String[] adminDls = context.getConfig().getAdminDls();
if (adminDls == null || adminDls.length < 1) {
logger.warn(NO_NEED_TO_SEND_EMAIL_USER_LIST_IS_EMPTY);
return;
}
List<String> users = Lists.newArrayList(adminDls);
Map<String, Object> dataMap = Maps.newHashMap();
dataMap.put("job_name", getName());
dataMap.put("env_name", context.getConfig().getDeployEnv());
dataMap.put(SUBMITTER, StringUtil.noBlank(getSubmitter(), "missing submitter"));
dataMap.put("job_engine", MailNotificationUtil.getLocalHostName());
dataMap.put("error_log",
Matcher.quoteReplacement(StringUtil.noBlank(exception.getMessage(), "no error message")));
String content = MailNotificationUtil.getMailContent(MailNotificationUtil.METADATA_PERSIST_FAIL, dataMap);
String title = MailNotificationUtil.getMailTitle("METADATA PERSIST", "FAIL",
context.getConfig().getDeployEnv());
new MailService(context.getConfig()).sendMail(users, title, content);
}
示例25
public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Class keyClass,
Class valueClass) throws IOException {
List<String> inputFolders = Lists.newArrayList();
Path inputHDFSPath = new Path(inputPath);
FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
boolean hasDir = false;
for (FileStatus stat : fileStatuses) {
if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
hasDir = true;
inputFolders.add(stat.getPath().toString());
}
}
if (!hasDir) {
return env.createInput(HadoopInputs.readSequenceFile(keyClass, valueClass, inputHDFSPath.toString()));
}
Job job = Job.getInstance();
FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ","));
return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClass, valueClass, job));
}
示例26
public FlinkExecutable createFactDistinctColumnsFlinkStep(String jobId) {
final FlinkExecutable flinkExecutable = new FlinkExecutable();
final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
flinkExecutable.setClassName(FlinkFactDistinctColumns.class.getName());
flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobId));
flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_TABLE.getOpt(), seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_INPUT_PATH.getOpt(), tablePath);
flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_OUTPUT_PATH.getOpt(), getFactDistinctColumnsPath(jobId));
flinkExecutable.setParam(FlinkFactDistinctColumns.OPTION_STATS_SAMPLING_PERCENT.getOpt(), String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));
flinkExecutable.setJobId(jobId);
flinkExecutable.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
flinkExecutable.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES, getCounterOutputPath(jobId));
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
flinkExecutable.setJars(jars.toString());
return flinkExecutable;
}
示例27
public void configureFlinkJob(final CubeSegment seg, final FlinkExecutable flinkExecutable,
final String jobId, final String cuboidRootPath) {
final IJoinedFlatTableDesc flatTableDesc = EngineFactory.getJoinedFlatTableDesc(seg);
final String tablePath = JoinedFlatTable.getTableDir(flatTableDesc, getJobWorkingDir(jobId));
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_TABLE.getOpt(),
seg.getConfig().getHiveDatabaseForIntermediateTable() + "." + flatTableDesc.getTableName());
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_INPUT_PATH.getOpt(),
tablePath);
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_META_URL.getOpt(),
getSegmentMetadataUrl(seg.getConfig(), jobId));
flinkExecutable.setParam(FlinkCubingByLayer.OPTION_OUTPUT_PATH.getOpt(), cuboidRootPath);
flinkExecutable.setJobId(jobId);
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
flinkExecutable.setJars(jars.toString());
flinkExecutable.setName(ExecutableConstants.STEP_NAME_BUILD_FLINK_CUBE);
}
示例28
public FlinkExecutable createMergeCuboidDataFlinkStep(CubeSegment seg, List<CubeSegment> mergingSegments, String jobID) {
final List<String> mergingCuboidPaths = Lists.newArrayList();
for (CubeSegment merging : mergingSegments) {
mergingCuboidPaths.add(getCuboidRootPath(merging));
}
String formattedPath = StringUtil.join(mergingCuboidPaths, ",");
String outputPath = getCuboidRootPath(jobID);
final FlinkExecutable flinkExecutable = new FlinkExecutable();
flinkExecutable.setClassName(FlinkCubingMerge.class.getName());
flinkExecutable.setParam(FlinkCubingMerge.OPTION_CUBE_NAME.getOpt(), seg.getRealization().getName());
flinkExecutable.setParam(FlinkCubingMerge.OPTION_SEGMENT_ID.getOpt(), seg.getUuid());
flinkExecutable.setParam(FlinkCubingMerge.OPTION_INPUT_PATH.getOpt(), formattedPath);
flinkExecutable.setParam(FlinkCubingMerge.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(seg.getConfig(), jobID));
flinkExecutable.setParam(FlinkCubingMerge.OPTION_OUTPUT_PATH.getOpt(), outputPath);
flinkExecutable.setJobId(jobID);
flinkExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);
StringBuilder jars = new StringBuilder();
StringUtil.appendWithSeparator(jars, seg.getConfig().getFlinkAdditionalJars());
flinkExecutable.setJars(jars.toString());
return flinkExecutable;
}
示例29
public void check(List<String> segFullNameList) {
issueExistHTables = Lists.newArrayList();
inconsistentHTables = Lists.newArrayList();
for (String segFullName : segFullNameList) {
String[] sepNameList = StringUtil.splitByComma(segFullName);
try {
HTableDescriptor hTableDescriptor = hbaseAdmin.getTableDescriptor(TableName.valueOf(sepNameList[0]));
String host = hTableDescriptor.getValue(IRealizationConstants.HTableTag);
if (!dstCfg.getMetadataUrlPrefix().equalsIgnoreCase(host)) {
inconsistentHTables.add(segFullName);
}
} catch (IOException e) {
issueExistHTables.add(segFullName);
continue;
}
}
}
示例30
void init(DataModelDesc model) {
table = table.toUpperCase(Locale.ROOT);
if (columns != null) {
StringUtil.toUpperCaseArray(columns, columns);
}
if (model != null) {
table = model.findTable(table).getAlias();
if (columns != null) {
for (int i = 0; i < columns.length; i++) {
TblColRef column = model.findColumn(table, columns[i]);
if (column.getColumnDesc().isComputedColumn() && !model.isFactTable(column.getTableRef())) {
throw new RuntimeException("Computed Column on lookup table is not allowed");
}
columns[i] = column.getName();
}
}
}
}