Java源码示例:org.apache.pig.tools.pigstats.PigStatusReporter

示例1
/**
* Mocks the Reporter.incrCounter, but adds buffering.
* See org.apache.hadoop.mapred.Reporter's incrCounter.
*/
public void incrCounter(String group, String counterName, long incr) {
    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null && reporter.incrCounter(group, counterName, incr)) { // common case
        if (counterStringMap_.size() > 0) {
            for (Map.Entry<Pair<String, String>, Long> entry : counterStringMap_.entrySet()) {
                reporter.incrCounter(entry.getKey().first, entry.getKey().second, entry.getValue());
            }
            counterStringMap_.clear();
        }
        return;
    }

    // In the case when reporter is not available, or we can't get the
    // Counter, store in the local map.
    Pair<String, String> key = new Pair<String, String>(group, counterName);
    Long currentValue = counterStringMap_.get(key);
    counterStringMap_.put(key, (currentValue == null ? 0 : currentValue) + incr);
}
 
示例2
/**
* Mocks the Reporter.incrCounter, but adds buffering.
* See org.apache.hadoop.mapred.Reporter's incrCounter.
*/
public void incrCounter(Enum<?> key, long incr) {
    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null && reporter.incrCounter(key, incr)) { // common case
        if (counterEnumMap_.size() > 0) {
            for (Map.Entry<Enum<?>, Long> entry : counterEnumMap_.entrySet()) {
                reporter.getCounter(entry.getKey()).increment(entry.getValue());
            }
            counterEnumMap_.clear();
        }
        return;
    }

    // In the case when reporter is not available, or we can't get the
    // Counter, store in the local map.
    Long currentValue = counterEnumMap_.get(key);
    counterEnumMap_.put(key, (currentValue == null ? 0 : currentValue) + incr);
}
 
示例3
public MapReducePOStoreImpl(TaskInputOutputContext<?,?,?,?> context) {
    // get a copy of the Configuration so that changes to the
    // configuration below (like setting the output location) do
    // not affect the caller's copy
    Configuration outputConf = new Configuration(context.getConfiguration());
    reporter = PigStatusReporter.getInstance();
    reporter.setContext(new MRTaskContext(context));

    // make a copy of the Context to use here - since in the same
    // task (map or reduce) we could have multiple stores, we should
    // make this copy so that the same context does not get over-written
    // by the different stores.

    this.context = HadoopShims.createTaskAttemptContext(outputConf,
            context.getTaskAttemptID());
}
 
示例4
/**
 * @param context
 *
 */
public PigRecordReader(InputFormat<?, ?> inputformat, PigSplit pigSplit,
        LoadFunc loadFunc, TaskAttemptContext context, long limit) throws IOException, InterruptedException {
    this.inputformat = inputformat;
    this.pigSplit = pigSplit;
    this.loadfunc = loadFunc;
    this.context = context;
    this.reporter = PigStatusReporter.getInstance();
    this.inputSpecificConf = context.getConfiguration();
    curReader = null;
    progress = 0;
    idx = 0;
    this.limit = limit;
    initNextRecordReader();
    doTiming = inputSpecificConf.getBoolean(PIG_UDF_PROFILE, false);
    if (doTiming) {
        counterGroup = loadFunc.toString();
        timingFrequency = inputSpecificConf.getLong(PIG_UDF_PROFILE_FREQUENCY, 100L);
    }
}
 
示例5
public VespaDocumentOperation(String... params) {
    statusReporter = PigStatusReporter.getInstance();
    if (statusReporter != null) {
        statusReporter.incrCounter("Vespa Document Operation Counters", "Document operation ok", 0);
        statusReporter.incrCounter("Vespa Document Operation Counters", "Document operation failed", 0);
    }
    properties = VespaConfiguration.loadProperties(params);
    template = properties.getProperty(PROPERTY_ID_TEMPLATE);
    operation = Operation.fromString(properties.getProperty(PROPERTY_OPERATION, "put"));
    verbose = Boolean.parseBoolean(properties.getProperty(PROPERTY_VERBOSE, "false"));
}
 
示例6
public static void create(ReduceContext context, Configuration conf) throws IOException
{
    redContext = context;
    isMapper = false;
    initCommonConfig(conf);
    PigStatusReporter.getInstance().setContext(new MRTaskContext(context));
}
 
示例7
public static void create(MapContext context, Configuration conf) throws IOException
{
    mapContext = context;
    isMapper = true;
    initCommonConfig(conf);
    PigStatusReporter.getInstance().setContext(new MRTaskContext(context));
}
 
示例8
@SuppressWarnings("rawtypes")
protected void incSpillCount(Enum counter, long numRecsSpilled) {
    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null && reporter.getCounter(counter)!=null) {
        reporter.getCounter(counter).increment(numRecsSpilled);
    } else {
        PigHadoopLogger.getInstance().warn(mContents, "Spill counter incremented", counter);
    }
}
 
示例9
@SuppressWarnings("unchecked")
public static void handleError(EvalFunc evalFunc, Exception e) {
    evalFunc.getLogger().error(e);
    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null) {
        reporter.incrCounter(evalFunc.getClass().getName(), e.toString(), 1);
    }
}
 
示例10
@SuppressWarnings("unchecked")
public static void handleTimeout(EvalFunc evalFunc, Exception e) {
    evalFunc.getLogger().error(e);
    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null) {
        reporter.incrCounter(evalFunc.getClass().getName(), "MonitoredUDF Timeout", 1);
    }
}
 
示例11
private void init(PhysicalPlan pp, POStore poStore) throws IOException {
    poStore.setStoreImpl(new FetchPOStoreImpl(pigContext));
    poStore.setUp();

    TaskAttemptID taskAttemptID = HadoopShims.getNewTaskAttemptID();
    HadoopShims.setTaskAttemptId(conf, taskAttemptID);

    if (!PlanHelper.getPhysicalOperators(pp, POStream.class).isEmpty()) {
        MapRedUtil.setupStreamingDirsConfSingle(poStore, pigContext, conf);
    }

    String currentTime = Long.toString(System.currentTimeMillis());
    conf.set("pig.script.submitted.timestamp", currentTime);
    conf.set("pig.job.submitted.timestamp", currentTime);

    PhysicalOperator.setReporter(new FetchProgressableReporter());
    SchemaTupleBackend.initialize(conf, pigContext);

    UDFContext udfContext = UDFContext.getUDFContext();
    udfContext.addJobConf(conf);
    udfContext.setClientSystemProps(pigContext.getProperties());
    udfContext.serialize(conf);

    PigMapReduce.sJobConfInternal.set(conf);
    Utils.setDefaultTimeZone(conf);

    boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning"));
    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new FetchTaskContext(new FetchContext()));
    PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
    pigHadoopLogger.setReporter(pigStatusReporter);
    pigHadoopLogger.setAggregate(aggregateWarning);
    PhysicalOperator.setPigLogger(pigHadoopLogger);
}
 
示例12
/**
 * The reduce function which packages the key and List &lt;Tuple&gt;
 * into key, Bag&lt;Tuple&gt; after converting Hadoop type key into Pig type.
 * The package result is either collected as is, if the reduce plan is
 * empty or after passing through the reduce plan.
 */
@Override
protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {
    if(!initialized) {
        initialized = true;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);
    }

    // In the case we optimize, we combine
    // POPackage and POForeach - so we could get many
    // tuples out of the getnext() call of POJoinPackage
    // In this case, we process till we see EOP from
    // POJoinPacakage.getNext()
    if (pack.getPkgr() instanceof JoinPackager)
    {
        pack.attachInput(key, tupIter.iterator());
        while (true)
        {
            if (processOnePackageOutput(context))
                break;
        }
    }
    else {
        // not optimized, so package will
        // give only one tuple out for the key
        pack.attachInput(key, tupIter.iterator());
        processOnePackageOutput(context);
    }

}
 
示例13
/**
 * Here is set up the task id, in order to be attached to each tuple
 **/
@Override
public void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    int taskIDInt = context.getTaskAttemptID().getTaskID().getId();
    taskID = String.valueOf(taskIDInt);

    pOperator = mp.getLeaves().get(0);

    while(true) {
        if(pOperator instanceof POCounter){
            ((POCounter) pOperator).setTaskId(taskIDInt);
            ((POCounter) pOperator).resetLocalCounter();
            break;
        } else {
            pOperator = mp.getPredecessors(pOperator).get(0);
        }
    }

    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null) {
        reporter.incrCounter(
                JobControlCompiler.PIG_MAP_RANK_NAME
                + context.getJobID().toString(), taskID, 0);
    }
}
 
示例14
/**
 * While tuples are collected, they are counted one by one by a global counter per task.
 **/
@Override
public void collect(Context context, Tuple tuple)
throws InterruptedException, IOException {
    context.write(null, tuple);
    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null) {
        reporter.incrCounter(
                JobControlCompiler.PIG_MAP_RANK_NAME
                + context.getJobID().toString(), taskID, 1);
    }
}
 
示例15
/**
 * On this case, global counters are accessed during reduce phase (immediately after a
 * sorting phase) and the increment for global counters are dependent if it is dense rank
 * or not.
 * If it is a dense rank, increment is done by 1. if it is not increment depends on the size
 * of the size of bag in the tuple.
 * @param increment is the value to add to the corresponding global counter.
 **/
public static void incrementCounter(Long increment) {
    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null) {
        if(leaf instanceof POCounter){
            reporter.incrCounter(
                    JobControlCompiler.PIG_MAP_RANK_NAME
                    + context.getJobID().toString(), taskID, increment);
        }
    }
}
 
示例16
@Override
public Integer exec(Tuple input) throws IOException {
    
    try {
        Thread.sleep(7500);
        PigStatusReporter reporter = PigStatusReporter.getInstance();
        reporter.progress();
        Thread.sleep(7500);
    } catch (InterruptedException e) {
    }
    
    return 100;
}
 
示例17
@Override
public DataBag exec(Tuple input) throws IOException
{
  PigStatusReporter reporter = PigStatusReporter.getInstance();

  try {
    DataBag inputBag = (DataBag) input.get(0);
    DataBag outputBag = bagFactory.newDefaultBag();
    long i=0, j, cnt=0;

    if (inputBag != null)
    {
      for (Tuple elem1 : inputBag) {
        j = 0; 
        for (Tuple elem2 : inputBag) {
          if (j > i) {
            outputBag.add(tupleFactory.newTuple(Arrays.asList(elem1, elem2)));
            cnt++;
          }
          j++;

          if (reporter != null)
            reporter.progress();

          if (cnt % 1000000 == 0) {
            outputBag.spill();
            cnt = 0;
          }
        }
        i++;
      }
    }
    
    return outputBag;
  }
  catch (Exception e) {
    throw new RuntimeException("Caught exception processing input of " + this.getClass().getName(), e);
  }
}
 
示例18
protected static void safeIncrCounter(String group, String name, Long increment) {
    PigStatusReporter.getInstance().incrCounter(group, name, increment);
}
 
示例19
private void init() {
	PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
	PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
	pigHadoopLogger.setReporter(pigStatusReporter);
	PhysicalOperator.setPigLogger(pigHadoopLogger);
}
 
示例20
@SuppressWarnings("unchecked")
@Override
public void initialize() throws Exception {
    // Reset any static variables to avoid conflict in container-reuse.
    sampleVertex = null;
    sampleMap = null;

    // Reset static variables cleared for avoiding OOM.
    new JVMReuseImpl().cleanupStaticData();

    UserPayload payload = getContext().getUserPayload();
    conf = TezUtils.createConfFromUserPayload(payload);
    PigContext.setPackageImportList((ArrayList<String>) ObjectSerializer
            .deserialize(conf.get("udf.import.list")));
    PigContext pc = (PigContext) ObjectSerializer.deserialize(conf.get("pig.pigContext"));

    // To determine front-end in UDFContext
    conf.set(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, getContext().getUniqueIdentifier());
    conf.set(PigConstants.TASK_INDEX, Integer.toString(getContext().getTaskIndex()));
    UDFContext.getUDFContext().addJobConf(conf);
    UDFContext.getUDFContext().deserialize();

    String execPlanString = conf.get(PLAN);
    execPlan = (PhysicalPlan) ObjectSerializer.deserialize(execPlanString);
    SchemaTupleBackend.initialize(conf, pc);
    PigMapReduce.sJobContext = HadoopShims.createJobContext(conf, new org.apache.hadoop.mapreduce.JobID());

    // Set the job conf as a thread-local member of PigMapReduce
    // for backwards compatibility with the existing code base.
    PigMapReduce.sJobConfInternal.set(conf);

    Utils.setDefaultTimeZone(conf);

    boolean aggregateWarning = "true".equalsIgnoreCase(pc.getProperties().getProperty("aggregate.warning"));
    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new TezTaskContext(getContext()));
    pigHadoopLogger = PigHadoopLogger.getInstance();
    pigHadoopLogger.setReporter(pigStatusReporter);
    pigHadoopLogger.setAggregate(aggregateWarning);
    PhysicalOperator.setPigLogger(pigHadoopLogger);

    LinkedList<TezTaskConfigurable> tezTCs = PlanHelper.getPhysicalOperators(execPlan, TezTaskConfigurable.class);
    for (TezTaskConfigurable tezTC : tezTCs){
        tezTC.initialize(getContext());
    }
}
 
示例21
/**
 * The reduce function which packages the key and List&lt;Tuple&gt;
 * into key, Bag&lt;Tuple&gt; after converting Hadoop type key into Pig type.
 * The package result is either collected as is, if the reduce plan is
 * empty or after passing through the reduce plan.
 */
@Override
protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {

    if (!initialized) {
        initialized = true;

        // cache the collector for use in runPipeline()
        // which could additionally be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        if (!inIllustrator)
            for (POStore store: stores) {
                MapReducePOStoreImpl impl
                    = new MapReducePOStoreImpl(context);
                store.setStoreImpl(impl);
                store.setUp();
            }
    }

    // In the case we optimize the join, we combine
    // POPackage and POForeach - so we could get many
    // tuples out of the getnext() call of POJoinPackage
    // In this case, we process till we see EOP from
    // POJoinPacakage.getNext()
    if (pack.getPkgr() instanceof JoinPackager)
    {
        pack.attachInput(key, tupIter.iterator());
        while (true)
        {
            if (processOnePackageOutput(context))
                break;
        }
    }
    else {
        // join is not optimized, so package will
        // give only one tuple out for the key
        pack.attachInput(key, tupIter.iterator());
        processOnePackageOutput(context);
    }
}
 
示例22
/**
 * The reduce function which packages the key and List&lt;Tuple&gt;
 * into key, Bag&lt;Tuple&gt; after converting Hadoop type key into Pig type.
 * The package result is either collected as is, if the reduce plan is
 * empty or after passing through the reduce plan.
 */
@Override
protected void reduce(PigNullableWritable key, Iterable<NullableTuple> tupIter, Context context)
        throws IOException, InterruptedException {

    if (!initialized) {
        initialized = true;

        // cache the collector for use in runPipeline()
        // which could additionally be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        for (POStore store: stores) {
            MapReducePOStoreImpl impl
                = new MapReducePOStoreImpl(context);
            store.setStoreImpl(impl);
            store.setUp();
        }
    }

    // If the keyType is not a tuple, the MapWithComparator.collect()
    // would have wrapped the key into a tuple so that the
    // comparison UDF used in the order by can process it.
    // We need to unwrap the key out of the tuple and hand it
    // to the POPackage for processing
    if(keyType != DataType.TUPLE) {
        Tuple t = (Tuple)(key.getValueAsPigType());
        try {
            key = HDataType.getWritableComparableTypes(t.get(0), keyType);
        } catch (ExecException e) {
            throw e;
        }
    }

    pack.attachInput(key, tupIter.iterator());

    Result res = pack.getNextTuple();
    if(res.returnStatus==POStatus.STATUS_OK){
        Tuple packRes = (Tuple)res.result;

        if(rp.isEmpty()){
            context.write(null, packRes);
            return;
        }

        rp.attachInput(packRes);

        List<PhysicalOperator> leaves = rp.getLeaves();

        PhysicalOperator leaf = leaves.get(0);
        runPipeline(leaf);

    }

    if(res.returnStatus==POStatus.STATUS_NULL) {
        return;
    }

    if(res.returnStatus==POStatus.STATUS_ERR){
        int errCode = 2093;
        String msg = "Encountered error in package operator while processing group.";
        throw new ExecException(msg, errCode, PigException.BUG);
    }

}
 
示例23
/**
 * Configures the mapper with the map plan and the
 * reproter thread
 */
@SuppressWarnings("unchecked")
@Override
public void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    Configuration job = context.getConfiguration();
    SpillableMemoryManager.configure(ConfigurationUtil.toProperties(job));
    context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
    PigMapReduce.sJobContext = context;
    PigMapReduce.sJobConfInternal.set(context.getConfiguration());
    PigMapReduce.sJobConf = context.getConfiguration();
    inIllustrator = inIllustrator(context);

    PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(job.get("udf.import.list")));
    pigContext = (PigContext)ObjectSerializer.deserialize(job.get("pig.pigContext"));

    // This attempts to fetch all of the generated code from the distributed cache, and resolve it
    SchemaTupleBackend.initialize(job, pigContext);

    if (pigContext.getLog4jProperties()!=null)
        PropertyConfigurator.configure(pigContext.getLog4jProperties());

    if (mp == null)
        mp = (PhysicalPlan) ObjectSerializer.deserialize(
            job.get("pig.mapPlan"));
    stores = PlanHelper.getPhysicalOperators(mp, POStore.class);

    // To be removed
    if(mp.isEmpty())
        log.debug("Map Plan empty!");
    else{
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        mp.explain(baos);
        log.debug(baos.toString());
    }
    keyType = ((byte[])ObjectSerializer.deserialize(job.get("pig.map.keytype")))[0];
    // till here

    pigReporter = new ProgressableReporter();
    // Get the UDF specific context
    MapRedUtil.setupUDFContext(job);

    if(!(mp.isEmpty())) {

        PigSplit split = (PigSplit)context.getInputSplit();
        List<OperatorKey> targetOpKeys = split.getTargetOps();

        ArrayList<PhysicalOperator> targetOpsAsList = new ArrayList<PhysicalOperator>();
        for (OperatorKey targetKey : targetOpKeys) {
            targetOpsAsList.add(mp.getOperator(targetKey));
        }
        roots = targetOpsAsList.toArray(new PhysicalOperator[1]);
        leaf = mp.getLeaves().get(0);
    }

    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new MRTaskContext(context));

    log.info("Aliases being processed per job phase (AliasName[line,offset]): " + job.get("pig.alias.location"));

    Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get());
}
 
示例24
/**
 * The map function that attaches the inpTuple appropriately
 * and executes the map plan if its not empty. Collects the
 * result of execution into oc or the input directly to oc
 * if map plan empty. The collection is left abstract for the
 * map-only or map-reduce job to implement. Map-only collects
 * the tuple as-is whereas map-reduce collects it after extracting
 * the key and indexed tuple.
 */
@Override
protected void map(Text key, Tuple inpTuple, Context context) throws IOException, InterruptedException {
    if(!initialized) {
        initialized  = true;
        // cache the collector for use in runPipeline() which
        // can be called from close()
        this.outputCollector = context;
        pigReporter.setRep(context);
        PhysicalOperator.setReporter(pigReporter);

        boolean aggregateWarning = "true".equalsIgnoreCase(pigContext.getProperties().getProperty("aggregate.warning"));
        PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
        pigStatusReporter.setContext(new MRTaskContext(context));
        PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
        pigHadoopLogger.setReporter(pigStatusReporter);
        pigHadoopLogger.setAggregate(aggregateWarning);
        PhysicalOperator.setPigLogger(pigHadoopLogger);

        if (!inIllustrator) {
            for (POStore store: stores) {
                MapReducePOStoreImpl impl
                    = new MapReducePOStoreImpl(context);
                store.setStoreImpl(impl);
                if (!pigContext.inIllustrator)
                    store.setUp();
            }
        }
    }

    if (mp.isEmpty()) {
        collect(context,inpTuple);
        return;
    }

    for (PhysicalOperator root : roots) {
        if (inIllustrator) {
            if (root != null) {
                root.attachInput(inpTuple);
            }
        } else {
            root.attachInput(tf.newTupleNoCopy(inpTuple.getAll()));
        }
    }

    runPipeline(leaf);
}
 
示例25
public void setReporter(PigStatusReporter reporter) {
    this.reporter = reporter;
}