提问者:小点点

在Reducer中使用System. setProperty()


我试图设置一个Java系统属性java. util.Arrays.useLegacyMergeSort在我的Reducer中,强制系统使用JDK6 Arrays.sort方法的实现,而不是JDK8。

package scoring.devicestatus;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.log4j.Logger;

import domain.DeviceEvent;
import domain.DeviceStatus;
import domain.EndOfPeriodEvent;
import domain.PiidBoundaryEvent;
import util.DateUtils;

public class DeviceStatusReducer extends Reducer<Text, Text, Text, NullWritable> {

    public static final Logger logger = Logger.getLogger(DeviceStatusReducer.class);

    @Override
    public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        -->Configuration conf = context.getConfiguration();
        .       .
        .
        try{
            .
            .
            writeToContext(key, records, context);
        }catch(Throwable t){
            logger.error("Error processing VIN: " + key.toString(), t);
            throw new RuntimeException(t);
        }
    }

    List<DeviceStatus> reduce(Iterable<Text> values, String endDate) {
        List<DeviceEvent> events = createEvents(values, endDate);
        .
        .       
    }

    List<DeviceEvent> createEvents(Iterable<Text> values, String endDate) {
        List<DeviceEvent> events = new ArrayList<DeviceEvent>(10);
        for(Text text : values){
            List<DeviceEvent> instances = DeviceEvent.getInstance(text);
            for(DeviceEvent instance : instances){
                if(!Constants.ORPHAN_PIID.equals(instance.getProgramInstanceId())){
                    events.add(instance);
                }
            }
        }
    --> System.out.println("Reducer Class:"+System.getProperty("java.util.Arrays.useLegacyMergeSort")+" "+conf.get("java.util.Arrays.useLegacyMergeSort"));
        Collections.sort(events);
        return events;
    }

    public void writeToContext(Text key, List<DeviceStatus> deviceStatuses, Context context) throws IOException, InterruptedException {
        String vin = key.toString();
        for (DeviceStatus status : deviceStatuses) {
            context.write(new Text(status.toCsvString(vin)), NullWritable.get());
        }
    }
}
package domain;
    public abstract class DeviceEvent implements Comparable<DeviceEvent> {

        .
        .
        .   
        @Override
        public int compareTo(DeviceEvent arg0) {
            int comparison = getTimeStamp().compareTo(arg0.getTimeStamp());
            if(comparison == 0){
                comparison = isInstalled() ? 1 : -1;
            }
            return comparison;
        }
        .
        .
        .

    }

我使用-D选项通过命令行运行jar

hadoop jar jarname.jar classname -Djava.util.Arrays.useLegacyMergeSort=true args

当我使用System. out.println("Reducer Class:"System.getProperty("java.util.Arrays.useLegacyMergeSort") " " conf.get("java.util.Arrays.useLegacyMergeSort"))打印属性时,我的错误仍然没有得到修复。

它看起来像这样:

标准输出:减速机类:空真减速机类:空真减速机类:空真减速机类:空真减速机类:空真减速机类:空真减速机类:空真

我确信java. util.Arrays.useLegacyMergeSort有效,因为当我在Main类中设置为System.setProperty("java.util.Arrays.useLegacyMergeSort","true")时,它在我的eclipse(OneJVM)上有效,但在Hadoop集群(多个JVM)上不起作用。我还尝试在Reducer中直接将其设置为System.setProperty("java.util.Arrays.useLegacyMergeSort","true")

如何让属性分布在所有JVM上?或者如何将配置属性作为系统属性?谢谢


共1个答案

匿名用户

它的工作原理是运行jar

hadoop jar JarName.jar MainClassName -D mapreduce.reduce.java.opts=-Djava.util.Arrays.useLegacyMergeSort=true args

请注意,-D后面有一个空格用于hadoop属性,-D后面没有空格用于设置JVM属性。