Java源码示例:org.apache.pig.tools.pigstats.PigStatusReporter
示例1
/**
* Mocks the Reporter.incrCounter, but adds buffering.
* See org.apache.hadoop.mapred.Reporter's incrCounter.
*/
public void incrCounter(String group, String counterName, long incr) {
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null && reporter.incrCounter(group, counterName, incr)) { // common case
if (counterStringMap_.size() > 0) {
for (Map.Entry<Pair<String, String>, Long> entry : counterStringMap_.entrySet()) {
reporter.incrCounter(entry.getKey().first, entry.getKey().second, entry.getValue());
}
counterStringMap_.clear();
}
return;
}
// In the case when reporter is not available, or we can't get the
// Counter, store in the local map.
Pair<String, String> key = new Pair<String, String>(group, counterName);
Long currentValue = counterStringMap_.get(key);
counterStringMap_.put(key, (currentValue == null ? 0 : currentValue) + incr);
}
示例2
/**
* Mocks the Reporter.incrCounter, but adds buffering.
* See org.apache.hadoop.mapred.Reporter's incrCounter.
*/
public void incrCounter(Enum<?> key, long incr) {
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null && reporter.incrCounter(key, incr)) { // common case
if (counterEnumMap_.size() > 0) {
for (Map.Entry<Enum<?>, Long> entry : counterEnumMap_.entrySet()) {
reporter.getCounter(entry.getKey()).increment(entry.getValue());
}
counterEnumMap_.clear();
}
return;
}
// In the case when reporter is not available, or we can't get the
// Counter, store in the local map.
Long currentValue = counterEnumMap_.get(key);
counterEnumMap_.put(key, (currentValue == null ? 0 : currentValue) + incr);
}
示例3
public MapReducePOStoreImpl(TaskInputOutputContext<?,?,?,?> context) {
// get a copy of the Configuration so that changes to the
// configuration below (like setting the output location) do
// not affect the caller's copy
Configuration outputConf = new Configuration(context.getConfiguration());
reporter = PigStatusReporter.getInstance();
reporter.setContext(new MRTaskContext(context));
// make a copy of the Context to use here - since in the same
// task (map or reduce) we could have multiple stores, we should
// make this copy so that the same context does not get over-written
// by the different stores.
this.context = HadoopShims.createTaskAttemptContext(outputConf,
context.getTaskAttemptID());
}
示例4
/**
* @param context
*
*/
public PigRecordReader(InputFormat<?, ?> inputformat, PigSplit pigSplit,
LoadFunc loadFunc, TaskAttemptContext context, long limit) throws IOException, InterruptedException {
this.inputformat = inputformat;
this.pigSplit = pigSplit;
this.loadfunc = loadFunc;
this.context = context;
this.reporter = PigStatusReporter.getInstance();
this.inputSpecificConf = context.getConfiguration();
curReader = null;
progress = 0;
idx = 0;
this.limit = limit;
initNextRecordReader();
doTiming = inputSpecificConf.getBoolean(PIG_UDF_PROFILE, false);
if (doTiming) {
counterGroup = loadFunc.toString();
timingFrequency = inputSpecificConf.getLong(PIG_UDF_PROFILE_FREQUENCY, 100L);
}
}
示例5
public VespaDocumentOperation(String... params) {
statusReporter = PigStatusReporter.getInstance();
if (statusReporter != null) {
statusReporter.incrCounter("Vespa Document Operation Counters", "Document operation ok", 0);
statusReporter.incrCounter("Vespa Document Operation Counters", "Document operation failed", 0);
}
properties = VespaConfiguration.loadProperties(params);
template = properties.getProperty(PROPERTY_ID_TEMPLATE);
operation = Operation.fromString(properties.getProperty(PROPERTY_OPERATION, "put"));
verbose = Boolean.parseBoolean(properties.getProperty(PROPERTY_VERBOSE, "false"));
}
示例6
public static void create(ReduceContext context, Configuration conf) throws IOException
{
redContext = context;
isMapper = false;
initCommonConfig(conf);
PigStatusReporter.getInstance().setContext(new MRTaskContext(context));
}
示例7
public static void create(MapContext context, Configuration conf) throws IOException
{
mapContext = context;
isMapper = true;
initCommonConfig(conf);
PigStatusReporter.getInstance().setContext(new MRTaskContext(context));
}
示例8
@SuppressWarnings("rawtypes")
protected void incSpillCount(Enum counter, long numRecsSpilled) {
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null && reporter.getCounter(counter)!=null) {
reporter.getCounter(counter).increment(numRecsSpilled);
} else {
PigHadoopLogger.getInstance().warn(mContents, "Spill counter incremented", counter);
}
}
示例9
@SuppressWarnings("unchecked")
public static void handleError(EvalFunc evalFunc, Exception e) {
evalFunc.getLogger().error(e);
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null) {
reporter.incrCounter(evalFunc.getClass().getName(), e.toString(), 1);
}
}
示例10
@SuppressWarnings("unchecked")
public static void handleTimeout(EvalFunc evalFunc, Exception e) {
evalFunc.getLogger().error(e);
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null) {
reporter.incrCounter(evalFunc.getClass().getName(), "MonitoredUDF Timeout", 1);
}
}
示例11
private void init(PhysicalPlan pp, POStore poStore) throws IOException {
poStore.setStoreImpl(new FetchPOStoreImpl(pigContext));
poStore.setUp();
TaskAttemptID taskAttemptID = HadoopShims.getNewTaskAttemptID();
HadoopShims.setTaskAttemptId(conf, taskAttemptID);
if (!PlanHelper.getPhysicalOperators(pp, POStream.class).isEmpty()) {
MapRedUtil.setupStreamingDirsConfSingle(poStore, pigContext, conf);
}
String currentTime = Long.toString(System.currentTimeMillis());
conf.set("pig.script.submitted.timestamp", currentTime);
conf.set("pig.job.submitted.timestamp", currentTime);
PhysicalOperator.setReporter(new FetchProgressableReporter());
SchemaTupleBackend.initialize(conf, pigContext);
UDFContext udfContext = UDFContext.getUDFContext();
udfContext.addJobConf(conf);
udfContext.setClientSystemProps(pigContext.getProperties());
udfContext.serialize(conf);
PigMapReduce.sJobConfInternal.set(conf);
Utils.setDefaultTimeZone(conf);
boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning"));
PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
pigStatusReporter.setContext(new FetchTaskContext(new FetchContext()));
PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
pigHadoopLogger.setReporter(pigStatusReporter);
pigHadoopLogger.setAggregate(aggregateWarning);
PhysicalOperator.setPigLogger(pigHadoopLogger);
}
示例12
/**
* The reduce function which packages the key and List <Tuple>
* into key, Bag<Tuple> after converting Hadoop type key into Pig type.
* The package result is either collected as is, if the reduce plan is
* empty or after passing through the reduce plan.
*/
@Override
protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
throws IOException, InterruptedException {
if(!initialized) {
initialized = true;
pigReporter.setRep(context);
PhysicalOperator.setReporter(pigReporter);
boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
pigStatusReporter.setContext(new MRTaskContext(context));
PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
pigHadoopLogger.setReporter(pigStatusReporter);
pigHadoopLogger.setAggregate(aggregateWarning);
PhysicalOperator.setPigLogger(pigHadoopLogger);
}
// In the case we optimize, we combine
// POPackage and POForeach - so we could get many
// tuples out of the getnext() call of POJoinPackage
// In this case, we process till we see EOP from
// POJoinPacakage.getNext()
if (pack.getPkgr() instanceof JoinPackager)
{
pack.attachInput(key, tupIter.iterator());
while (true)
{
if (processOnePackageOutput(context))
break;
}
}
else {
// not optimized, so package will
// give only one tuple out for the key
pack.attachInput(key, tupIter.iterator());
processOnePackageOutput(context);
}
}
示例13
/**
* Here is set up the task id, in order to be attached to each tuple
**/
@Override
public void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
int taskIDInt = context.getTaskAttemptID().getTaskID().getId();
taskID = String.valueOf(taskIDInt);
pOperator = mp.getLeaves().get(0);
while(true) {
if(pOperator instanceof POCounter){
((POCounter) pOperator).setTaskId(taskIDInt);
((POCounter) pOperator).resetLocalCounter();
break;
} else {
pOperator = mp.getPredecessors(pOperator).get(0);
}
}
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null) {
reporter.incrCounter(
JobControlCompiler.PIG_MAP_RANK_NAME
+ context.getJobID().toString(), taskID, 0);
}
}
示例14
/**
* While tuples are collected, they are counted one by one by a global counter per task.
**/
@Override
public void collect(Context context, Tuple tuple)
throws InterruptedException, IOException {
context.write(null, tuple);
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null) {
reporter.incrCounter(
JobControlCompiler.PIG_MAP_RANK_NAME
+ context.getJobID().toString(), taskID, 1);
}
}
示例15
/**
* On this case, global counters are accessed during reduce phase (immediately after a
* sorting phase) and the increment for global counters are dependent if it is dense rank
* or not.
* If it is a dense rank, increment is done by 1. if it is not increment depends on the size
* of the size of bag in the tuple.
* @param increment is the value to add to the corresponding global counter.
**/
public static void incrementCounter(Long increment) {
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null) {
if(leaf instanceof POCounter){
reporter.incrCounter(
JobControlCompiler.PIG_MAP_RANK_NAME
+ context.getJobID().toString(), taskID, increment);
}
}
}
示例16
@Override
public Integer exec(Tuple input) throws IOException {
try {
Thread.sleep(7500);
PigStatusReporter reporter = PigStatusReporter.getInstance();
reporter.progress();
Thread.sleep(7500);
} catch (InterruptedException e) {
}
return 100;
}
示例17
@Override
public DataBag exec(Tuple input) throws IOException
{
PigStatusReporter reporter = PigStatusReporter.getInstance();
try {
DataBag inputBag = (DataBag) input.get(0);
DataBag outputBag = bagFactory.newDefaultBag();
long i=0, j, cnt=0;
if (inputBag != null)
{
for (Tuple elem1 : inputBag) {
j = 0;
for (Tuple elem2 : inputBag) {
if (j > i) {
outputBag.add(tupleFactory.newTuple(Arrays.asList(elem1, elem2)));
cnt++;
}
j++;
if (reporter != null)
reporter.progress();
if (cnt % 1000000 == 0) {
outputBag.spill();
cnt = 0;
}
}
i++;
}
}
return outputBag;
}
catch (Exception e) {
throw new RuntimeException("Caught exception processing input of " + this.getClass().getName(), e);
}
}
示例18
protected static void safeIncrCounter(String group, String name, Long increment) {
PigStatusReporter.getInstance().incrCounter(group, name, increment);
}
示例19
private void init() {
PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
pigHadoopLogger.setReporter(pigStatusReporter);
PhysicalOperator.setPigLogger(pigHadoopLogger);
}
示例20
@SuppressWarnings("unchecked")
@Override
public void initialize() throws Exception {
// Reset any static variables to avoid conflict in container-reuse.
sampleVertex = null;
sampleMap = null;
// Reset static variables cleared for avoiding OOM.
new JVMReuseImpl().cleanupStaticData();
UserPayload payload = getContext().getUserPayload();
conf = TezUtils.createConfFromUserPayload(payload);
PigContext.setPackageImportList((ArrayList<String>) ObjectSerializer
.deserialize(conf.get("udf.import.list")));
PigContext pc = (PigContext) ObjectSerializer.deserialize(conf.get("pig.pigContext"));
// To determine front-end in UDFContext
conf.set(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, getContext().getUniqueIdentifier());
conf.set(PigConstants.TASK_INDEX, Integer.toString(getContext().getTaskIndex()));
UDFContext.getUDFContext().addJobConf(conf);
UDFContext.getUDFContext().deserialize();
String execPlanString = conf.get(PLAN);
execPlan = (PhysicalPlan) ObjectSerializer.deserialize(execPlanString);
SchemaTupleBackend.initialize(conf, pc);
PigMapReduce.sJobContext = HadoopShims.createJobContext(conf, new org.apache.hadoop.mapreduce.JobID());
// Set the job conf as a thread-local member of PigMapReduce
// for backwards compatibility with the existing code base.
PigMapReduce.sJobConfInternal.set(conf);
Utils.setDefaultTimeZone(conf);
boolean aggregateWarning = "true".equalsIgnoreCase(pc.getProperties().getProperty("aggregate.warning"));
PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
pigStatusReporter.setContext(new TezTaskContext(getContext()));
pigHadoopLogger = PigHadoopLogger.getInstance();
pigHadoopLogger.setReporter(pigStatusReporter);
pigHadoopLogger.setAggregate(aggregateWarning);
PhysicalOperator.setPigLogger(pigHadoopLogger);
LinkedList<TezTaskConfigurable> tezTCs = PlanHelper.getPhysicalOperators(execPlan, TezTaskConfigurable.class);
for (TezTaskConfigurable tezTC : tezTCs){
tezTC.initialize(getContext());
}
}
示例21
/**
* The reduce function which packages the key and List<Tuple>
* into key, Bag<Tuple> after converting Hadoop type key into Pig type.
* The package result is either collected as is, if the reduce plan is
* empty or after passing through the reduce plan.
*/
@Override
protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
throws IOException, InterruptedException {
if (!initialized) {
initialized = true;
// cache the collector for use in runPipeline()
// which could additionally be called from close()
this.outputCollector = context;
pigReporter.setRep(context);
PhysicalOperator.setReporter(pigReporter);
boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
pigStatusReporter.setContext(new MRTaskContext(context));
PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
pigHadoopLogger.setReporter(pigStatusReporter);
pigHadoopLogger.setAggregate(aggregateWarning);
PhysicalOperator.setPigLogger(pigHadoopLogger);
if (!inIllustrator)
for (POStore store: stores) {
MapReducePOStoreImpl impl
= new MapReducePOStoreImpl(context);
store.setStoreImpl(impl);
store.setUp();
}
}
// In the case we optimize the join, we combine
// POPackage and POForeach - so we could get many
// tuples out of the getnext() call of POJoinPackage
// In this case, we process till we see EOP from
// POJoinPacakage.getNext()
if (pack.getPkgr() instanceof JoinPackager)
{
pack.attachInput(key, tupIter.iterator());
while (true)
{
if (processOnePackageOutput(context))
break;
}
}
else {
// join is not optimized, so package will
// give only one tuple out for the key
pack.attachInput(key, tupIter.iterator());
processOnePackageOutput(context);
}
}
示例22
/**
* The reduce function which packages the key and List<Tuple>
* into key, Bag<Tuple> after converting Hadoop type key into Pig type.
* The package result is either collected as is, if the reduce plan is
* empty or after passing through the reduce plan.
*/
@Override
protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
throws IOException, InterruptedException {
if (!initialized) {
initialized = true;
// cache the collector for use in runPipeline()
// which could additionally be called from close()
this.outputCollector = context;
pigReporter.setRep(context);
PhysicalOperator.setReporter(pigReporter);
boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
pigStatusReporter.setContext(new MRTaskContext(context));
PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
pigHadoopLogger.setReporter(pigStatusReporter);
pigHadoopLogger.setAggregate(aggregateWarning);
PhysicalOperator.setPigLogger(pigHadoopLogger);
for (POStore store: stores) {
MapReducePOStoreImpl impl
= new MapReducePOStoreImpl(context);
store.setStoreImpl(impl);
store.setUp();
}
}
// If the keyType is not a tuple, the MapWithComparator.collect()
// would have wrapped the key into a tuple so that the
// comparison UDF used in the order by can process it.
// We need to unwrap the key out of the tuple and hand it
// to the POPackage for processing
if(keyType != DataType.TUPLE) {
Tuple t = (Tuple)(key.getValueAsPigType());
try {
key = HDataType.getWritableComparableTypes(t.get(0), keyType);
} catch (ExecException e) {
throw e;
}
}
pack.attachInput(key, tupIter.iterator());
Result res = pack.getNextTuple();
if(res.returnStatus==POStatus.STATUS_OK){
Tuple packRes = (Tuple)res.result;
if(rp.isEmpty()){
context.write(null, packRes);
return;
}
rp.attachInput(packRes);
List<PhysicalOperator> leaves = rp.getLeaves();
PhysicalOperator leaf = leaves.get(0);
runPipeline(leaf);
}
if(res.returnStatus==POStatus.STATUS_NULL) {
return;
}
if(res.returnStatus==POStatus.STATUS_ERR){
int errCode = 2093;
String msg = "Encountered error in package operator while processing group.";
throw new ExecException(msg, errCode, PigException.BUG);
}
}
示例23
/**
* Configures the mapper with the map plan and the
* reproter thread
*/
@SuppressWarnings("unchecked")
@Override
public void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration job = context.getConfiguration();
SpillableMemoryManager.configure(ConfigurationUtil.toProperties(job));
context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
PigMapReduce.sJobContext = context;
PigMapReduce.sJobConfInternal.set(context.getConfiguration());
PigMapReduce.sJobConf = context.getConfiguration();
inIllustrator = inIllustrator(context);
PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(job.get("udf.import.list")));
pigContext = (PigContext)ObjectSerializer.deserialize(job.get("pig.pigContext"));
// This attempts to fetch all of the generated code from the distributed cache, and resolve it
SchemaTupleBackend.initialize(job, pigContext);
if (pigContext.getLog4jProperties()!=null)
PropertyConfigurator.configure(pigContext.getLog4jProperties());
if (mp == null)
mp = (PhysicalPlan) ObjectSerializer.deserialize(
job.get("pig.mapPlan"));
stores = PlanHelper.getPhysicalOperators(mp, POStore.class);
// To be removed
if(mp.isEmpty())
log.debug("Map Plan empty!");
else{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
mp.explain(baos);
log.debug(baos.toString());
}
keyType = ((byte[])ObjectSerializer.deserialize(job.get("pig.map.keytype")))[0];
// till here
pigReporter = new ProgressableReporter();
// Get the UDF specific context
MapRedUtil.setupUDFContext(job);
if(!(mp.isEmpty())) {
PigSplit split = (PigSplit)context.getInputSplit();
List<OperatorKey> targetOpKeys = split.getTargetOps();
ArrayList<PhysicalOperator> targetOpsAsList = new ArrayList<PhysicalOperator>();
for (OperatorKey targetKey : targetOpKeys) {
targetOpsAsList.add(mp.getOperator(targetKey));
}
roots = targetOpsAsList.toArray(new PhysicalOperator[1]);
leaf = mp.getLeaves().get(0);
}
PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
pigStatusReporter.setContext(new MRTaskContext(context));
log.info("Aliases being processed per job phase (AliasName[line,offset]): " + job.get("pig.alias.location"));
Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get());
}
示例24
/**
* The map function that attaches the inpTuple appropriately
* and executes the map plan if its not empty. Collects the
* result of execution into oc or the input directly to oc
* if map plan empty. The collection is left abstract for the
* map-only or map-reduce job to implement. Map-only collects
* the tuple as-is whereas map-reduce collects it after extracting
* the key and indexed tuple.
*/
@Override
protected void map(Text key, Tuple inpTuple, Context context) throws IOException, InterruptedException {
if(!initialized) {
initialized = true;
// cache the collector for use in runPipeline() which
// can be called from close()
this.outputCollector = context;
pigReporter.setRep(context);
PhysicalOperator.setReporter(pigReporter);
boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
pigStatusReporter.setContext(new MRTaskContext(context));
PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
pigHadoopLogger.setReporter(pigStatusReporter);
pigHadoopLogger.setAggregate(aggregateWarning);
PhysicalOperator.setPigLogger(pigHadoopLogger);
if (!inIllustrator) {
for (POStore store: stores) {
MapReducePOStoreImpl impl
= new MapReducePOStoreImpl(context);
store.setStoreImpl(impl);
if (!pigContext.inIllustrator)
store.setUp();
}
}
}
if (mp.isEmpty()) {
collect(context,inpTuple);
return;
}
for (PhysicalOperator root : roots) {
if (inIllustrator) {
if (root != null) {
root.attachInput(inpTuple);
}
} else {
root.attachInput(tf.newTupleNoCopy(inpTuple.getAll()));
}
}
runPipeline(leaf);
}
示例25
public void setReporter(PigStatusReporter reporter) {
this.reporter = reporter;
}