Java源码示例:org.apache.hadoop.hive.ql.hooks.ReadEntity
示例1
@Test
public void testCTAS() throws Exception {
String tableName = createTable();
String ctasTableName = "table" + random();
String query = "create table " + ctasTableName + " as select * from " + tableName;
runCommand(query);
final Set<ReadEntity> readEntities = getInputs(tableName, Entity.Type.TABLE);
final Set<WriteEntity> writeEntities = getOutputs(ctasTableName, Entity.Type.TABLE);
HiveEventContext hiveEventContext = constructEvent(query, HiveOperation.CREATETABLE_AS_SELECT, readEntities,
writeEntities);
AtlasEntity processEntity1 = validateProcess(hiveEventContext);
AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, hiveEventContext);
AtlasObjectId process = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), processEntity1.getGuid());
Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
assertTableIsRegistered(DEFAULT_DB, ctasTableName);
}
示例2
private void assertProcessIsNotRegistered(HiveEventContext event) throws Exception {
try {
SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
if (event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
if (event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs()));
LOG.debug("Searching for process with query {}", processQFName);
assertEntityIsNotRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQFName);
} catch(Exception e) {
LOG.error("Exception : ", e);
}
}
示例3
private static void addInputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<ReadEntity> sortedInputs, StringBuilder buffer, final Map<ReadEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
if (refs != null) {
if (sortedInputs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
for (Entity input : sortedInputs) {
if (!dataSetsProcessed.contains(input.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (ignoreHDFSPathsInQFName &&
(Type.DFS_DIR.equals(input.getType()) || Type.LOCAL_DIR.equals(input.getType()))) {
LOG.debug("Skipping dfs dir input addition to process qualified name {} ", input.getName());
} else if (refs.containsKey(input)) {
if ( input.getType() == Type.PARTITION || input.getType() == Type.TABLE) {
final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(hiveBridge.hiveClient.getTable(input.getTable().getDbName(), input.getTable().getTableName()));
addDataset(buffer, refs.get(input), createTime.getTime());
} else {
addDataset(buffer, refs.get(input));
}
}
dataSetsProcessed.add(input.getName().toLowerCase());
}
}
}
}
}
示例4
private Set<ReadEntity> getInputs(String inputName, Entity.Type entityType) throws HiveException {
final ReadEntity entity = new ReadEntity();
if ( Entity.Type.DFS_DIR.equals(entityType)) {
entity.setName(lower(new Path(inputName).toString()));
entity.setTyp(Entity.Type.DFS_DIR);
} else {
entity.setName(getQualifiedTblName(inputName));
entity.setTyp(entityType);
}
if (entityType == Entity.Type.TABLE) {
entity.setT(hiveMetaStoreBridge.hiveClient.getTable(DEFAULT_DB, inputName));
}
return new LinkedHashSet<ReadEntity>() {{ add(entity); }};
}
示例5
private Referenceable validateProcess(HiveHook.HiveEventContext event, Set<ReadEntity> inputTables, Set<WriteEntity> outputTables) throws Exception {
String processId = assertProcessIsRegistered(event, inputTables, outputTables);
Referenceable process = atlasClient.getEntity(processId);
if (inputTables == null) {
Assert.assertNull(process.get(INPUTS));
} else {
Assert.assertEquals(((List<Referenceable>) process.get(INPUTS)).size(), inputTables.size());
validateInputTables(process, inputTables);
}
if (outputTables == null) {
Assert.assertNull(process.get(OUTPUTS));
} else {
Assert.assertEquals(((List<Id>) process.get(OUTPUTS)).size(), outputTables.size());
validateOutputTables(process, outputTables);
}
return process;
}
示例6
@Test(enabled = false)
public void testInsertIntoTempTable() throws Exception {
String tableName = createTable();
String insertTableName = createTable(false, false, true);
assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsNotRegistered(DEFAULT_DB, insertTableName, true);
String query =
"insert into " + insertTableName + " select id, name from " + tableName;
runCommand(query);
Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
outputs.iterator().next().setName(getQualifiedTblName(insertTableName + HiveMetaStoreBridge.TEMP_TABLE_PREFIX + SessionState.get().getSessionId()));
outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT);
validateProcess(constructEvent(query, HiveOperation.QUERY, inputs, outputs));
assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true);
}
示例7
private String assertProcessIsRegistered(final HiveHook.HiveEventContext event, final Set<ReadEntity> inputTbls, final Set<WriteEntity> outputTbls) throws Exception {
try {
SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
if ( event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
if ( event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(inputTbls), getSortedProcessDataSets(outputTbls));
LOG.debug("Searching for process with query {}", processQFName);
return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName, new AssertPredicate() {
@Override
public void assertOnEntity(final Referenceable entity) throws Exception {
List<String> recentQueries = (List<String>) entity.get("recentQueries");
Assert.assertEquals(recentQueries.get(0), lower(event.getQueryStr()));
}
});
} catch(Exception e) {
LOG.error("Exception : ", e);
throw e;
}
}
示例8
private void assertProcessIsNotRegistered(HiveHook.HiveEventContext event) throws Exception {
try {
SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
if ( event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
if ( event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs()));
LOG.debug("Searching for process with query {}", processQFName);
assertEntityIsNotRegistered(HiveDataTypes.HIVE_PROCESS.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, processQFName);
} catch( Exception e) {
LOG.error("Exception : ", e);
}
}
示例9
public SentryOnFailureHookContextImpl(String command,
Set<ReadEntity> inputs, Set<WriteEntity> outputs, HiveOperation hiveOp,
Database db, Table tab, AccessURI udfURI, AccessURI partitionURI,
String userName, String ipAddress, AuthorizationException e,
Configuration conf) {
this.command = command;
this.inputs = inputs;
this.outputs = outputs;
this.hiveOp = hiveOp;
this.userName = userName;
this.ipAddress = ipAddress;
this.database = db;
this.table = tab;
this.udfURI = udfURI;
this.partitionURI = partitionURI;
this.authException = e;
this.conf = conf;
}
示例10
@Override
public Task<? extends Serializable> createShowRoleGrantTask(ASTNode ast, Path resultFile,
HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs) throws SemanticException {
ASTNode child = (ASTNode) ast.getChild(0);
PrincipalType principalType = PrincipalType.USER;
switch (child.getType()) {
case HiveParser.TOK_USER:
principalType = PrincipalType.USER;
break;
case HiveParser.TOK_GROUP:
principalType = PrincipalType.GROUP;
break;
case HiveParser.TOK_ROLE:
principalType = PrincipalType.ROLE;
break;
}
if (principalType != PrincipalType.GROUP) {
String msg = SentryHiveConstants.GRANT_REVOKE_NOT_SUPPORTED_FOR_PRINCIPAL + principalType;
throw new SemanticException(msg);
}
String principalName = BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText());
RoleDDLDesc roleDesc = new RoleDDLDesc(principalName, principalType,
RoleDDLDesc.RoleOperation.SHOW_ROLE_GRANT, null);
roleDesc.setResFile(resultFile.toString());
return createTask(new DDLWork(inputs, outputs, roleDesc));
}
示例11
@Override
public Task<? extends Serializable> createRevokeTask(ASTNode ast, HashSet<ReadEntity> inputs,
HashSet<WriteEntity> outputs) throws SemanticException {
List<PrivilegeDesc> privilegeDesc = analyzePrivilegeListDef((ASTNode) ast.getChild(0));
List<PrincipalDesc> principalDesc = analyzePrincipalListDef((ASTNode) ast.getChild(1));
PrivilegeObjectDesc privilegeObj = null;
if (ast.getChildCount() > 2) {
ASTNode astChild = (ASTNode) ast.getChild(2);
privilegeObj = analyzePrivilegeObject(astChild);
}
if (privilegeObj != null && privilegeObj.getPartSpec() != null) {
throw new SemanticException(SentryHiveConstants.PARTITION_PRIVS_NOT_SUPPORTED);
}
for (PrincipalDesc princ : principalDesc) {
if (princ.getType() != PrincipalType.ROLE) {
String msg = SentryHiveConstants.GRANT_REVOKE_NOT_SUPPORTED_FOR_PRINCIPAL + princ.getType();
throw new SemanticException(msg);
}
}
RevokeDesc revokeDesc = new RevokeDesc(privilegeDesc, principalDesc, privilegeObj);
return createTask(new DDLWork(inputs, outputs, revokeDesc));
}
示例12
private Task<? extends Serializable> analyzeGrantRevokeRole(boolean isGrant, ASTNode ast,
HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs) throws SemanticException {
List<PrincipalDesc> principalDesc = analyzePrincipalListDef(
(ASTNode) ast.getChild(0));
List<String> roles = new ArrayList<String>();
for (int i = 1; i < ast.getChildCount(); i++) {
roles.add(BaseSemanticAnalyzer.unescapeIdentifier(ast.getChild(i).getText()));
}
String roleOwnerName = "";
if (SessionState.get() != null
&& SessionState.get().getAuthenticator() != null) {
roleOwnerName = SessionState.get().getAuthenticator().getUserName();
}
for (PrincipalDesc princ : principalDesc) {
if (princ.getType() != PrincipalType.GROUP) {
String msg = SentryHiveConstants.GRANT_REVOKE_NOT_SUPPORTED_ON_OBJECT + princ.getType();
throw new SemanticException(msg);
}
}
GrantRevokeRoleDDL grantRevokeRoleDDL = new GrantRevokeRoleDDL(isGrant,
roles, principalDesc, roleOwnerName, PrincipalType.USER, false);
return createTask(new DDLWork(inputs, outputs, grantRevokeRoleDDL));
}
示例13
@Override
public Task<? extends Serializable> createShowRolePrincipalsTask(ASTNode ast, Path resFile,
HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs) throws SemanticException {
String roleName;
if (ast.getChildCount() == 1) {
roleName = ast.getChild(0).getText();
} else {
// the parser should not allow this
throw new AssertionError("Unexpected Tokens in SHOW ROLE PRINCIPALS");
}
RoleDDLDesc roleDDLDesc = new RoleDDLDesc(roleName, PrincipalType.ROLE,
RoleDDLDesc.RoleOperation.SHOW_ROLE_PRINCIPALS, null);
roleDDLDesc.setResFile(resFile.toString());
return createTask(new DDLWork(inputs, outputs, roleDDLDesc));
//return TaskFactory.get(new DDLWork(inputs, outputs, roleDDLDesc), conf);
}
示例14
/**
* Add column level hierarchy to inputHierarchy
*
* @param inputHierarchy
* @param entity
* @param sentryContext
*/
private void addColumnHierarchy(List<List<DBModelAuthorizable>> inputHierarchy,
ReadEntity entity) {
List<DBModelAuthorizable> entityHierarchy = new ArrayList<DBModelAuthorizable>();
entityHierarchy.add(hiveAuthzBinding.getAuthServer());
entityHierarchy.addAll(getAuthzHierarchyFromEntity(entity));
switch (entity.getType()) {
case TABLE:
case PARTITION:
List<String> cols = entity.getAccessedColumns();
for (String col : cols) {
List<DBModelAuthorizable> colHierarchy = new ArrayList<DBModelAuthorizable>(entityHierarchy);
colHierarchy.add(new Column(col));
inputHierarchy.add(colHierarchy);
}
break;
default:
inputHierarchy.add(entityHierarchy);
}
}
示例15
/**
* Get Authorizable from inputs and put into inputHierarchy
*
* @param inputHierarchy
* @param entity
* @param sentryContext
*/
private void getInputHierarchyFromInputs(List<List<DBModelAuthorizable>> inputHierarchy,
Set<ReadEntity> inputs) {
for (ReadEntity readEntity: inputs) {
// skip the tables/view that are part of expanded view definition
// skip the Hive generated dummy entities created for queries like 'select <expr>'
if (isChildTabForView(readEntity) || isDummyEntity(readEntity)) {
continue;
}
if (readEntity.getAccessedColumns() != null && !readEntity.getAccessedColumns().isEmpty()) {
addColumnHierarchy(inputHierarchy, readEntity);
} else {
List<DBModelAuthorizable> entityHierarchy = new ArrayList<DBModelAuthorizable>();
entityHierarchy.add(hiveAuthzBinding.getAuthServer());
entityHierarchy.addAll(getAuthzHierarchyFromEntity(readEntity));
inputHierarchy.add(entityHierarchy);
}
}
}
示例16
/**
* Check if the given read entity is a table that has parents of type Table
* Hive compiler performs a query rewrite by replacing view with its definition. In the process, tt captures both
* the original view and the tables/view that it selects from .
* The access authorization is only interested in the top level views and not the underlying tables.
* @param readEntity
* @return
*/
private boolean isChildTabForView(ReadEntity readEntity) {
// If this is a table added for view, then we need to skip that
if (!readEntity.getType().equals(Type.TABLE) && !readEntity.getType().equals(Type.PARTITION)) {
return false;
}
if (readEntity.getParents() != null && readEntity.getParents().size() > 0) {
for (ReadEntity parentEntity : readEntity.getParents()) {
if (!parentEntity.getType().equals(Type.TABLE)) {
return false;
}
}
return true;
} else {
return false;
}
}
示例17
private boolean skipProcess() {
Set<ReadEntity> inputs = getInputs();
Set<WriteEntity> outputs = getOutputs();
boolean ret = CollectionUtils.isEmpty(inputs) && CollectionUtils.isEmpty(outputs);
if (!ret) {
if (getContext().getHiveOperation() == HiveOperation.QUERY) {
// Select query has only one output
if (outputs.size() == 1) {
WriteEntity output = outputs.iterator().next();
if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR) {
if (output.getWriteType() == WriteEntity.WriteType.PATH_WRITE && output.isTempURI()) {
ret = true;
}
}
// DELETE and UPDATE initially have one input and one output.
// Since they do not support sub-query, they won't create a lineage that have one input and one output. (One input only)
// It's safe to filter them out here.
if (output.getWriteType() == WriteEntity.WriteType.DELETE || output.getWriteType() == WriteEntity.WriteType.UPDATE) {
ret = true;
}
}
}
}
return ret;
}
示例18
private Set<ReadEntity> getInputs(String inputName, Entity.Type entityType) throws HiveException {
final ReadEntity entity;
if (Entity.Type.DFS_DIR.equals(entityType)) {
entity = new TestReadEntity(lower(new Path(inputName).toString()), entityType);
} else {
entity = new TestReadEntity(getQualifiedTblName(inputName), entityType);
}
if (entityType == Entity.Type.TABLE) {
entity.setT(hiveMetaStoreBridge.getHiveClient().getTable(DEFAULT_DB, inputName));
}
return new LinkedHashSet<ReadEntity>() {{ add(entity); }};
}
示例19
private HiveEventContext constructEvent(String query, HiveOperation op, Set<ReadEntity> inputs, Set<WriteEntity> outputs) {
HiveEventContext event = new HiveEventContext();
event.setQueryStr(query);
event.setOperation(op);
event.setInputs(inputs);
event.setOutputs(outputs);
return event;
}
示例20
private AtlasEntity validateProcess(HiveEventContext event, Set<ReadEntity> inputTables, Set<WriteEntity> outputTables) throws Exception {
String processId = assertProcessIsRegistered(event, inputTables, outputTables);
AtlasEntity processEntity = atlasClientV2.getEntityByGuid(processId).getEntity();
validateInputTables(processEntity, inputTables);
validateOutputTables(processEntity, outputTables);
return processEntity;
}
示例21
@Test(enabled = false)
public void testInsertIntoTempTable() throws Exception {
String tableName = createTable();
String insertTableName = createTable(false, false, true);
assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsNotRegistered(DEFAULT_DB, insertTableName, true);
String query = "insert into " + insertTableName + " select id, name from " + tableName;
runCommand(query);
Set<ReadEntity> inputs = getInputs(tableName, Entity.Type.TABLE);
Set<WriteEntity> outputs = getOutputs(insertTableName, Entity.Type.TABLE);
outputs.iterator().next().setWriteType(WriteEntity.WriteType.INSERT);
HiveEventContext event = constructEvent(query, HiveOperation.QUERY, inputs, outputs);
AtlasEntity hiveProcess = validateProcess(event);
AtlasEntity hiveProcessExecution = validateProcessExecution(hiveProcess, event);
AtlasObjectId process = toAtlasObjectId(hiveProcessExecution.getRelationshipAttribute(
BaseHiveEvent.ATTRIBUTE_PROCESS));
Assert.assertEquals(process.getGuid(), hiveProcess.getGuid());
Assert.assertEquals(numberOfProcessExecutions(hiveProcess), 1);
assertTableIsRegistered(DEFAULT_DB, tableName);
assertTableIsRegistered(DEFAULT_DB, insertTableName, null, true);
}
示例22
private String sortEventsAndGetProcessQualifiedName(final HiveEventContext event) throws HiveException{
SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
if (event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
if (event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
return getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(event.getInputs()), getSortedProcessDataSets(event.getOutputs()));
}
示例23
private String assertProcessIsRegistered(final HiveEventContext event, final Set<ReadEntity> inputTbls, final Set<WriteEntity> outputTbls) throws Exception {
try {
SortedSet<ReadEntity> sortedHiveInputs = event.getInputs() == null ? null : new TreeSet<ReadEntity>(entityComparator);
SortedSet<WriteEntity> sortedHiveOutputs = event.getOutputs() == null ? null : new TreeSet<WriteEntity>(entityComparator);
if (event.getInputs() != null) {
sortedHiveInputs.addAll(event.getInputs());
}
if (event.getOutputs() != null) {
sortedHiveOutputs.addAll(event.getOutputs());
}
String processQFName = getProcessQualifiedName(hiveMetaStoreBridge, event, sortedHiveInputs, sortedHiveOutputs, getSortedProcessDataSets(inputTbls), getSortedProcessDataSets(outputTbls));
LOG.debug("Searching for process with query {}", processQFName);
return assertEntityIsRegistered(HiveDataTypes.HIVE_PROCESS.getName(), ATTRIBUTE_QUALIFIED_NAME, processQFName, new AssertPredicate() {
@Override
public void assertOnEntity(final AtlasEntity entity) throws Exception {
List<String> recentQueries = (List<String>) entity.getAttribute(BaseHiveEvent.ATTRIBUTE_RECENT_QUERIES);
Assert.assertEquals(recentQueries.get(0), lower(event.getQueryStr()));
}
});
} catch(Exception e) {
LOG.error("Exception : ", e);
throw e;
}
}
示例24
@VisibleForTesting
protected static String getProcessQualifiedName(HiveMetaStoreBridge dgiBridge, HiveEventContext eventContext,
final SortedSet<ReadEntity> sortedHiveInputs,
final SortedSet<WriteEntity> sortedHiveOutputs,
SortedMap<ReadEntity, AtlasEntity> hiveInputsMap,
SortedMap<WriteEntity, AtlasEntity> hiveOutputsMap) throws HiveException {
HiveOperation op = eventContext.getOperation();
if (isCreateOp(eventContext)) {
Entity entity = getEntityByType(sortedHiveOutputs, Entity.Type.TABLE);
if (entity != null) {
Table outTable = entity.getTable();
//refresh table
outTable = dgiBridge.getHiveClient().getTable(outTable.getDbName(), outTable.getTableName());
return HiveMetaStoreBridge.getTableProcessQualifiedName(dgiBridge.getMetadataNamespace(), outTable);
}
}
StringBuilder buffer = new StringBuilder(op.getOperationName());
boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs);
if ( ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) {
LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr());
}
addInputs(dgiBridge, op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName);
buffer.append(IO_SEP);
addOutputs(dgiBridge, op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName);
LOG.info("Setting process qualified name to {}", buffer);
return buffer.toString();
}
示例25
protected static boolean ignoreHDFSPathsinQFName(final HiveOperation op, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) {
switch (op) {
case LOAD:
case IMPORT:
return isPartitionBasedQuery(outputs);
case EXPORT:
return isPartitionBasedQuery(inputs);
case QUERY:
return true;
}
return false;
}
示例26
protected static void addInputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<ReadEntity> sortedInputs, StringBuilder buffer, final Map<ReadEntity, AtlasEntity> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
if (refs != null) {
if (sortedInputs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
for (Entity input : sortedInputs) {
if (!dataSetsProcessed.contains(input.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (ignoreHDFSPathsInQFName &&
(Entity.Type.DFS_DIR.equals(input.getType()) || Entity.Type.LOCAL_DIR.equals(input.getType()))) {
LOG.debug("Skipping dfs dir input addition to process qualified name {} ", input.getName());
} else if (refs.containsKey(input)) {
if ( input.getType() == Entity.Type.PARTITION || input.getType() == Entity.Type.TABLE) {
Table inputTable = refreshTable(hiveBridge, input.getTable().getDbName(), input.getTable().getTableName());
if (inputTable != null) {
addDataset(buffer, refs.get(input), HiveMetaStoreBridge.getTableCreatedTime(inputTable));
}
} else {
addDataset(buffer, refs.get(input));
}
}
dataSetsProcessed.add(input.getName().toLowerCase());
}
}
}
}
}
示例27
private void handleExternalTables(final HiveMetaStoreBridge dgiBridge, final HiveEventContext event, final LinkedHashMap<Type, Referenceable> tables) throws HiveException, MalformedURLException {
List<Referenceable> entities = new ArrayList<>();
final WriteEntity hiveEntity = (WriteEntity) getEntityByType(event.getOutputs(), Type.TABLE);
Table hiveTable = hiveEntity == null ? null : hiveEntity.getTable();
//Refresh to get the correct location
if(hiveTable != null) {
hiveTable = dgiBridge.hiveClient.getTable(hiveTable.getDbName(), hiveTable.getTableName());
}
if (hiveTable != null && TableType.EXTERNAL_TABLE.equals(hiveTable.getTableType())) {
LOG.info("Registering external table process {} ", event.getQueryStr());
final String location = lower(hiveTable.getDataLocation().toString());
final ReadEntity dfsEntity = new ReadEntity();
dfsEntity.setTyp(Type.DFS_DIR);
dfsEntity.setD(new Path(location));
SortedMap<ReadEntity, Referenceable> hiveInputsMap = new TreeMap<ReadEntity, Referenceable>(entityComparator) {{
put(dfsEntity, dgiBridge.fillHDFSDataSet(location));
}};
SortedMap<WriteEntity, Referenceable> hiveOutputsMap = new TreeMap<WriteEntity, Referenceable>(entityComparator) {{
put(hiveEntity, tables.get(Type.TABLE));
}};
SortedSet<ReadEntity> sortedIps = new TreeSet<>(entityComparator);
sortedIps.addAll(hiveInputsMap.keySet());
SortedSet<WriteEntity> sortedOps = new TreeSet<>(entityComparator);
sortedOps.addAll(hiveOutputsMap.keySet());
Referenceable processReferenceable = getProcessReferenceable(dgiBridge, event,
sortedIps, sortedOps, hiveInputsMap, hiveOutputsMap);
entities.addAll(tables.values());
entities.add(processReferenceable);
event.addMessage(new HookNotification.EntityUpdateRequest(event.getUser(), entities));
}
}
示例28
private Referenceable getProcessReferenceable(HiveMetaStoreBridge dgiBridge, HiveEventContext hiveEvent,
final SortedSet<ReadEntity> sortedHiveInputs, final SortedSet<WriteEntity> sortedHiveOutputs, SortedMap<ReadEntity, Referenceable> source, SortedMap<WriteEntity, Referenceable> target)
throws HiveException {
Referenceable processReferenceable = new Referenceable(HiveDataTypes.HIVE_PROCESS.getName());
String queryStr = lower(hiveEvent.getQueryStr());
processReferenceable.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
getProcessQualifiedName(dgiBridge, hiveEvent, sortedHiveInputs, sortedHiveOutputs, source, target));
LOG.debug("Registering query: {}", queryStr);
List<Referenceable> sourceList = new ArrayList<>(source.values());
List<Referenceable> targetList = new ArrayList<>(target.values());
//The serialization code expected a list
if (sourceList != null && !sourceList.isEmpty()) {
processReferenceable.set("inputs", sourceList);
}
if (targetList != null && !targetList.isEmpty()) {
processReferenceable.set("outputs", targetList);
}
processReferenceable.set(AtlasClient.NAME, queryStr);
processReferenceable.set("operationType", hiveEvent.getOperation().getOperationName());
processReferenceable.set("startTime", new Date(hiveEvent.getQueryStartTime()));
processReferenceable.set("userName", hiveEvent.getUser());
processReferenceable.set("queryText", queryStr);
processReferenceable.set("queryId", hiveEvent.getQueryId());
processReferenceable.set("queryPlan", "Not Supported");
processReferenceable.set(AtlasConstants.CLUSTER_NAME_ATTRIBUTE, dgiBridge.getClusterName());
List<String> recentQueries = new ArrayList<>(1);
recentQueries.add(queryStr);
processReferenceable.set("recentQueries", recentQueries);
processReferenceable.set("endTime", new Date(System.currentTimeMillis()));
//TODO set queryGraph
return processReferenceable;
}
示例29
@VisibleForTesting
static String getProcessQualifiedName(HiveMetaStoreBridge dgiBridge, HiveEventContext eventContext,
final SortedSet<ReadEntity> sortedHiveInputs,
final SortedSet<WriteEntity> sortedHiveOutputs,
SortedMap<ReadEntity, Referenceable> hiveInputsMap,
SortedMap<WriteEntity, Referenceable> hiveOutputsMap) throws HiveException {
HiveOperation op = eventContext.getOperation();
if (isCreateOp(eventContext)) {
Entity entity = getEntityByType(sortedHiveOutputs, Type.TABLE);
if (entity != null) {
Table outTable = entity.getTable();
//refresh table
outTable = dgiBridge.hiveClient.getTable(outTable.getDbName(), outTable.getTableName());
return HiveMetaStoreBridge.getTableProcessQualifiedName(dgiBridge.getClusterName(), outTable);
}
}
StringBuilder buffer = new StringBuilder(op.getOperationName());
boolean ignoreHDFSPathsinQFName = ignoreHDFSPathsinQFName(op, sortedHiveInputs, sortedHiveOutputs);
if ( ignoreHDFSPathsinQFName && LOG.isDebugEnabled()) {
LOG.debug("Ignoring HDFS paths in qualifiedName for {} {} ", op, eventContext.getQueryStr());
}
addInputs(dgiBridge, op, sortedHiveInputs, buffer, hiveInputsMap, ignoreHDFSPathsinQFName);
buffer.append(IO_SEP);
addOutputs(dgiBridge, op, sortedHiveOutputs, buffer, hiveOutputsMap, ignoreHDFSPathsinQFName);
LOG.info("Setting process qualified name to {}", buffer);
return buffer.toString();
}
示例30
private static boolean ignoreHDFSPathsinQFName(final HiveOperation op, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) {
switch (op) {
case LOAD:
case IMPORT:
return isPartitionBasedQuery(outputs);
case EXPORT:
return isPartitionBasedQuery(inputs);
case QUERY:
return true;
}
return false;
}