Java源码示例:weka.filters.unsupervised.attribute.ReplaceMissingValues

示例1
/**
 * performs initialization of members
 */
@Override
protected void initializeMembers() {
  super.initializeMembers();

  m_KNNdetermined    = -1;
  m_NeighborsTestset = null;
  m_TrainsetNew      = null;
  m_TestsetNew       = null;
  m_UseNaiveSearch   = false;
  m_LabeledTestset   = null;
  m_Missing          = new ReplaceMissingValues();
  
  m_Classifier = new IBk();
  m_Classifier.setKNN(10);
  m_Classifier.setCrossValidate(true);
  m_Classifier.setWindowSize(0);
  m_Classifier.setMeanSquared(false);
  
  m_KNN = m_Classifier.getKNN();
  
  m_AdditionalMeasures.add("measureDeterminedKNN");
}
 
示例2
/**
 * Builds the classifier.
 *
 * @param data the data to train with
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception{
	
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances filteredData = new Instances(data);
  filteredData.deleteWithMissingClass();
  
  //replace missing values
  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(filteredData);	
  filteredData = Filter.useFilter(filteredData, m_replaceMissing);	
	
  //possibly convert nominal attributes globally
  if (m_convertNominal) {	    
    m_nominalToBinary = new NominalToBinary();
    m_nominalToBinary.setInputFormat(filteredData);	
    filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
  }

  int minNumInstances = 2;
	
  //create ModelSelection object, either for splits on the residuals or for splits on the class value 
  ModelSelection modSelection;	
  if (m_splitOnResiduals) {
    modSelection = new ResidualModelSelection(minNumInstances);
  } else {
    modSelection = new C45ModelSelection(minNumInstances, filteredData, true);
  }
	
  //create tree root
  m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, 
	 m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC);
  //build tree
  m_tree.buildClassifier(filteredData);

  if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup();
}
 
示例3
/**
 * Cleans up data
 *
 * @param data data to be cleaned up
 * @throws Exception if an error occurs
 */
private void cleanUpData(Instances data)throws Exception{

  m_Data = data;
  m_TransformFilter = new NominalToBinary();
  m_TransformFilter.setInputFormat(m_Data);
  m_Data = Filter.useFilter(m_Data, m_TransformFilter);
  m_MissingFilter = new ReplaceMissingValues();
  m_MissingFilter.setInputFormat(m_Data);
  m_Data = Filter.useFilter(m_Data, m_MissingFilter);
  m_Data.deleteWithMissingClass();
}
 
示例4
/**
 * default constructor
 */
public PLSFilter() {
  super();
  
  // setup pre-processing
  m_Missing = new ReplaceMissingValues();
  m_Filter  = new Center();
}
 
示例5
public static Instances estimateMissing(Instances data){

		ReplaceMissingValues nb = new ReplaceMissingValues();
		Instances nd=null;
		try{
			nb.setInputFormat(data);
			Instance temp;
			int n = data.numInstances();
			for(int i=0;i<n;i++)
				nb.input(data.instance(i));
			System.out.println(" Instances input");
			System.out.println(" Output format retrieved");
//			nd=Filter.useFilter(data,nb);
//			System.out.println(" Filtered? num atts = "+nd.numAttributes()+" num inst = "+nd.numInstances()+" filter = "+nb);
			if(nb.batchFinished())
				System.out.println(" batch finished ");
			nd=nb.getOutputFormat();
			for(int i=0;i<n;i++)
			{
				temp=nb.output();
//				System.out.println(temp); 
				nd.add(temp);
			}
		}catch(Exception e)
		{
			System.out.println("Error in estimateMissing  = "+e.toString());
			nd=data;
			System.exit(0);
			
		}
		return nd;
		
		}
 
示例6
/**
 * performs initialization of members
 */
@Override
protected void initializeMembers() {
  super.initializeMembers();
  
  m_Filter      = new ReplaceMissingValues();
  m_Classifier  = new YATSI();
  m_TrainsetNew = null;
  m_TestsetNew  = null;
}
 
示例7
/**
 * Builds a clusterer for a set of instances.
 *
 * @param data the instances to train the clusterer with
 * @throws Exception if the clusterer hasn't been set or something goes wrong
 */  
public void buildClusterer(Instances data) throws Exception {
  // can clusterer handle the data?
  getCapabilities().testWithFail(data);

  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(data);
  data = weka.filters.Filter.useFilter(data, m_replaceMissing);

  m_theInstances = new Instances(data, 0);
  if (m_wrappedClusterer == null) {
    throw new Exception("No clusterer has been set");
  }
  m_wrappedClusterer.buildClusterer(data);
  m_model = 
     new DiscreteEstimator[m_wrappedClusterer.numberOfClusters()][data.numAttributes()];
  m_modelNormal = 
    new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()][2];
  double[][] weights =  new double[m_wrappedClusterer.numberOfClusters()][data.numAttributes()];
  m_priors = new double[m_wrappedClusterer.numberOfClusters()]; 
   for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {
     m_priors[i] = 1.0; // laplace correction
     for (int j = 0; j < data.numAttributes(); j++) {
if (data.attribute(j).isNominal()) {
  m_model[i][j] = new DiscreteEstimator(data.attribute(j).numValues(),
				 true);
}
     }
   }
   
   Instance inst = null;

   // Compute mean, etc.
   int[] clusterIndex = new int[data.numInstances()];
   for (int i = 0; i < data.numInstances(); i++) {
     inst = data.instance(i);
     int cluster = m_wrappedClusterer.clusterInstance(inst);
     m_priors[cluster] += inst.weight();
     for (int j = 0; j < data.numAttributes(); j++) {
if (!inst.isMissing(j)) {
  if (data.attribute(j).isNominal()) {
    m_model[cluster][j].addValue(inst.value(j),inst.weight());
  } else {
    m_modelNormal[cluster][j][0] += inst.weight() * inst.value(j);
    weights[cluster][j] += inst.weight();
  }
}
     }
     clusterIndex[i] = cluster;
   }

   for (int j = 0; j < data.numAttributes(); j++) {
     if (data.attribute(j).isNumeric()) {
for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {	   
  if (weights[i][j] > 0) {
    m_modelNormal[i][j][0] /= weights[i][j];
  }
}
     }
   }

   // Compute standard deviations
   for (int i = 0; i < data.numInstances(); i++) {
     inst = data.instance(i);
     for (int j = 0; j < data.numAttributes(); j++) {
if (!inst.isMissing(j)) {
  if (data.attribute(j).isNumeric()) {
    double diff = m_modelNormal[clusterIndex[i]][j][0] - inst.value(j);
    m_modelNormal[clusterIndex[i]][j][1] += inst.weight() * diff * diff;
  }
}
     }
   }

   for (int j = 0; j < data.numAttributes(); j++) {
     if (data.attribute(j).isNumeric()) {
for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) {	   
  if (weights[i][j] > 0) {
    m_modelNormal[i][j][1] = 
      Math.sqrt(m_modelNormal[i][j][1] / weights[i][j]);
  } else if (weights[i][j] <= 0) {
    m_modelNormal[i][j][1] = Double.MAX_VALUE;
  }
  if (m_modelNormal[i][j][1] <= m_minStdDev) {
    m_modelNormal[i][j][1] = data.attributeStats(j).numericStats.stdDev;
    if (m_modelNormal[i][j][1] <= m_minStdDev) {
      m_modelNormal[i][j][1] = m_minStdDev;
    }
  }
}
     }
   }
   
   Utils.normalize(m_priors);
}
 
示例8
/**
 * Builds the classifier.
 *
 * @param data the data to train with
 * @throws Exception if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception{
	
    
  // can classifier handle the data?
  getCapabilities().testWithFail(data);

  // remove instances with missing class
  Instances filteredData = new Instances(data);
  filteredData.deleteWithMissingClass();
  
  //replace missing values
  m_replaceMissing = new ReplaceMissingValues();
  m_replaceMissing.setInputFormat(filteredData);	
  filteredData = Filter.useFilter(filteredData, m_replaceMissing);
  
  //possibly convert nominal attributes globally
  if (m_convertNominal) {	    
    m_nominalToBinary = new NominalToBinary();
    m_nominalToBinary.setInputFormat(filteredData);	
    filteredData = Filter.useFilter(filteredData, m_nominalToBinary);
  }
	
  int minNumInstances = 2;  
  
  
  //create a FT  tree root
  if (m_modelType==0)
    m_tree = new FTNode( m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                         m_weightTrimBeta, m_useAIC);
                     
  //create a FTLeaves  tree root
  if (m_modelType==1){ 
    m_tree = new FTLeavesNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                              m_weightTrimBeta, m_useAIC);
  }
  //create a FTInner  tree root
  if (m_modelType==2)
    m_tree = new FTInnerNode(m_errorOnProbabilities, m_numBoostingIterations, m_minNumInstances, 
                             m_weightTrimBeta, m_useAIC);
      
  //build tree
  m_tree.buildClassifier(filteredData);
  // prune tree
  m_tree.prune();
  m_tree.assignIDs(0);
  m_tree.cleanup();         
}
 
示例9
/**
 * convert models from old weka version
 *
 * @param model
 */
public static void fixOldModelVersion(final OrbitModel model) {
    if (model == null) return; // nothing to fix
    boolean oldWekaVersion = false;
    try {
        model.getStructure().classAttribute().numValues();
    } catch (NullPointerException ne) {
        oldWekaVersion = true;
    }

    // apply old model fix?
    if (oldWekaVersion) {
        logger.info("model from old weka version (< 3.7.11) detected, trying to apply fixes");
        int numClasses = model.getClassShapes().size();
        TissueFeatures tf = new TissueFeatures(model.getFeatureDescription(), null);
        int numFeatures = tf.getFeaturesPerSample() * model.getFeatureDescription().getSampleSize() + 1;
        ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(numFeatures);
        for (int a = 0; a < numFeatures - 1; a++) {
            Attribute attr = new Attribute("a" + a);
            attrInfo.add(attr);
        }
        List<String> classValues = new ArrayList<String>(numClasses);
        for (int i = 0; i < numClasses; i++) {
            classValues.add((i + 1) + ".0"); // "1.0", "2.0", ...
        }
        Attribute classAttr = new Attribute("class", classValues);
        attrInfo.add(classAttr);

        Instances structure = new Instances("trainSet pattern classes", attrInfo, 0);
        structure.setClassIndex(numFeatures - 1);
        model.setStructure(structure);

        try {
            if (model.getClassifier() != null && model.getClassifier().getClassifier() != null && model.getClassifier().getClassifier() instanceof SMO) {
                SMO smo = ((SMO) model.getClassifier().getClassifier());

                Field field = smo.getClass().getDeclaredField("m_classAttribute");
                field.setAccessible(true);
                field.set(smo, classAttr);

                // missing values
                ReplaceMissingValues rmv = new ReplaceMissingValues();
                rmv.setInputFormat(structure);

                Field missing = smo.getClass().getDeclaredField("m_Missing");
                missing.setAccessible(true);
                missing.set(smo, rmv);

                // filter
                Field filter = smo.getClass().getDeclaredField("m_Filter");
                filter.setAccessible(true);
                Filter normalize = (Filter) filter.get(smo);

                RelationalLocator relLoc = new RelationalLocator(structure);
                StringLocator strLoc = new StringLocator(structure);

                Field outputRelAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputRelAtts");
                outputRelAtts.setAccessible(true);
                outputRelAtts.set(normalize, relLoc);

                Field inputRelAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_InputRelAtts");
                inputRelAtts.setAccessible(true);
                inputRelAtts.set(normalize, relLoc);

                Field outputStrAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputStringAtts");
                outputStrAtts.setAccessible(true);
                outputStrAtts.set(normalize, strLoc);

                Field inputStrAtts = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_InputStringAtts");
                inputStrAtts.setAccessible(true);
                inputStrAtts.set(normalize, strLoc);

                Field outputFormat = normalize.getClass().getSuperclass().getSuperclass().getDeclaredField("m_OutputFormat");
                outputFormat.setAccessible(true);
                outputFormat.set(normalize, structure);

                logger.info("fixes applied, the model should work with a weka version >= 3.7.11 now");
            } // else: good luck...
        } catch (Exception e) {
            e.printStackTrace();
            logger.error("new weka version fixes could not be applied: " + e.getMessage());
        }
    } // old weka version
    fixOldModelVersion(model.getSegmentationModel());     // fixOldModelVersion can handle null
    fixOldModelVersion(model.getSecondarySegmentationModel());  // fixOldModelVersion can handle null
    fixOldModelVersion(model.getExclusionModel());  // fixOldModelVersion can handle null
}
 
示例10
public static Instances preProcessData(Instances data) throws Exception{
	
	/* 
	 * Remove useless attributes
	 */
	RemoveUseless removeUseless = new RemoveUseless();
	removeUseless.setOptions(new String[] { "-M", "99" });	// threshold
	removeUseless.setInputFormat(data);
	data = Filter.useFilter(data, removeUseless);

	
	/* 
	 * Remove useless attributes
	 */
	ReplaceMissingValues fixMissing = new ReplaceMissingValues();
	fixMissing.setInputFormat(data);
	data = Filter.useFilter(data, fixMissing);
	

	/* 
	 * Remove useless attributes
	 */
	Discretize discretizeNumeric = new Discretize();
	discretizeNumeric.setOptions(new String[] {
			"-O",
			"-M",  "-1.0", 
			"-B",  "4",  // no of bins
			"-R",  "first-last"}); //range of attributes
	fixMissing.setInputFormat(data);
	data = Filter.useFilter(data, fixMissing);

	/* 
	 * Select only informative attributes
	 */
	InfoGainAttributeEval eval = new InfoGainAttributeEval();
	Ranker search = new Ranker();
	search.setOptions(new String[] { "-T", "0.001" });	// information gain threshold
	AttributeSelection attSelect = new AttributeSelection();
	attSelect.setEvaluator(eval);
	attSelect.setSearch(search);
	
	// apply attribute selection
	attSelect.SelectAttributes(data);
	
	// remove the attributes not selected in the last run
	data = attSelect.reduceDimensionality(data);
	
	

	return data;
}
 
示例11
/**
 * Parses a given list of options. Valid options are: <p/>
 *
 * -D <br/>
 * Turn on debugging output.<p/>
 *
 * -W classname <br/>
 * Specify the full class name of a classifier as the basis for
 * collective classifying (required).<p/>
 *
 * -folds folds <br/>
 * the number of folds for splitting the training set into train and test
 * set.  the first fold is always the training set. With '-V' you can invert
 * this, i.e., instead of 20/80 for 5 folds you'll get 80/20. (default 5) <p/>
 *
 * -V <br/>
 * inverts the fold selection, i.e., instead of using the first fold for the
 * training set it is used for test set and the remaining folds for training.
 * <p/>
 *
 * -S seed <br/>
 * Random number seed for resampling (default 1). <p/>
 *
 * -verbose <br/>
 * whether to output some more information during improving the classifier.
 * <p/>
 *
 * -insight <br/> 
 *  whether to use the labels of the original test set to output more
 *  statistics. <p/>
 *
 * -F class-spec <br/>
 * The classname and parameters for the filter 
 * (default ReplaceMissingValues). <p/>
 *
 * Options after -- are passed to the designated classifier.<p/>
 *
 * @param options the list of options as an array of strings
 * @exception Exception if an option is not supported
 */
@Override
public void setOptions(String[] options) throws Exception {
  String        tmpStr;
  String[]      tmpOptions;

  tmpStr = Utils.getOption('F', options);
  if (tmpStr.length() != 0) {
    tmpOptions    = Utils.splitOptions(tmpStr);
    tmpStr        = tmpOptions[0];
    tmpOptions[0] = "";
    setFilter( 
        (Filter) Utils.forName(Filter.class, tmpStr, tmpOptions));
  }
  else {
    setFilter(new ReplaceMissingValues());
  }
  
  super.setOptions(options);
}
 
示例12
/**
 * initializes the object
 * @param parent      the parent algorithm
 * @param train       the train instances
 * @param test        the test instances
 * @param setWeights  whether to set the weights for the training set 
 *                    (the processed instances)
 * @throws Exception  if something goes wrong
 */
public YATSIInstances(YATSI parent, Instances train, Instances test, 
                      boolean setWeights) 
  throws Exception {

  super();

  m_Parent = parent;

  // build sorted array (train + test)
  double weight;
  if (getParent().getNoWeights())
    weight = 1.0;
  else
    weight =   (double) train.numInstances() 
             / (double) test.numInstances()
             * getParent().getWeightingFactor();
  m_Unprocessed = new Instance[train.numInstances() + test.numInstances()];
  for (int i = 0; i < train.numInstances(); i++)
    m_Unprocessed[i] = train.instance(i);
  for (int i = 0; i < test.numInstances(); i++) {
    m_Unprocessed[train.numInstances() + i] = test.instance(i);
    m_Unprocessed[train.numInstances() + i].setWeight(weight);
  }
  Arrays.sort(m_Unprocessed, m_Comparator);

  // weights
  m_Weights = new double[m_Unprocessed.length];
  for (int i = 0; i < m_Unprocessed.length; i++) {
    m_Weights[i] = m_Unprocessed[i].weight();
    if (!setWeights)
      m_Unprocessed[i].setWeight(1);
  }

  // filter data
  m_Trainset  = new Instances(train, 0);
  for (int i = 0; i < m_Unprocessed.length; i++)
    m_Trainset.add(m_Unprocessed[i]);

  // set up filter
  m_Missing = new ReplaceMissingValues();
  m_Missing.setInputFormat(m_Trainset);
  m_Trainset = Filter.useFilter(m_Trainset, m_Missing); 
}