Python源码示例:sklearn.preprocessing.MaxAbsScaler()

示例1
def test_01_xgb_classifier(self):
        print("\ntest 01 (xgb classifier with preprocessing) [multi-class]\n")
        model = XGBClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MaxAbsScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y)
        file_name = "test01xgboost.pmml"
        xgboost_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
示例2
def test_02_lgbm_classifier(self):
        print("\ntest 02 (lgbm classifier with preprocessing) [multi-class]\n")
        model = LGBMClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MaxAbsScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y)
        file_name = "test02lgbm.pmml"
        lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
示例3
def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)
        clf = RandomForestRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf 
示例4
def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)

        clf = GradientBoostingRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf 
示例5
def normalize_cv(X, y, i, norm="zero_score"):
    X_test = X[i]
    y_test = y[i]
    X_train = pd.concat(X[:i] + X[i+1:])
    y_train = pd.concat(y[:i] + y[i+1:])
    if norm == "min_max":
        scaler = preprocessing.MinMaxScaler()
    elif norm == "max_abs":
        scaler = preprocessing.MaxAbsScaler()
    else:
        scaler = preprocessing.StandardScaler()
    X_train = pd.DataFrame(scaler.fit_transform(X_train),
                           index=y_train.index.values)
    X_train.columns = X[i].columns.values
    X_test = pd.DataFrame(scaler.transform(X_test), index=y_test.index.values)
    X_test.columns = X[i].columns.values
    return X_train, X_test, y_train, y_test 
示例6
def transform(self, X):
        """Scale the data.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_timestamps)
            Data to scale.

        Returns
        -------
        X_new : array-like, shape = (n_samples, n_timestamps)
            Scaled data.

        """
        X = check_array(X, dtype='float64')
        scaler = SklearnMaxAbsScaler()
        X_new = scaler.fit_transform(X.T).T
        return X_new 
示例7
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler) 
示例8
def load_data():

    data_path = args['in']
        
    df = (pd.read_csv(data_path,skiprows=1).values).astype('float32')

    df_y = df[:,0].astype('float32')
    df_x = df[:, 1:PL].astype(np.float32)


#    scaler = MaxAbsScaler()
        
    scaler = StandardScaler()
    df_x = scaler.fit_transform(df_x)
        
    X_train, X_test, Y_train, Y_test = train_test_split(df_x, df_y, test_size= 0.20, random_state=42)
    
    print('x_train shape:', X_train.shape)
    print('x_test shape:', X_test.shape)

    
    return X_train, Y_train, X_test, Y_test 
示例9
def sparse_normalize_dataset(dataset):
    """ Normaliza dataset without removing the sparseness structure of the data """
    #Remove mean of dataset 
    dataset = dataset - np.mean(dataset)
    #Truncate to +/-3 standard deviations and scale to -1 to 1
    std_dev = 3 * np.std(dataset)
    dataset = np.maximum(np.minimum(dataset, std_dev), -std_dev) / std_dev
    #Rescale from [-1, 1] to [0.1, 0.9]
    dataset = (dataset + 1) * 0.4 + 0.1
    #dataset = (dataset-np.amin(dataset))/(np.amax(dataset)-np.amin(dataset))
    return dataset
    #return preprocessing.MaxAbsScaler().fit_transform(dataset) 
示例10
def scale_by_max_value(X):
	"""
	Scale each feature by its abs maximum value.

	Keyword arguments:
	X -- The feature vectors	
	"""

	if verbose:
		print '\nScaling to the range [-1,1] ...'

	max_abs_scaler = preprocessing.MaxAbsScaler()
	return max_abs_scaler.fit_transform(X) 
示例11
def normalize(data, norm="zero_score", scaler=None):
    """Normalize pandas Dataframe.

    @param data: Input dataframe
    @param norm: normalization method [default: zero_score standardization],
    alternatives: 'min_max', 'max_abs'
    @return datascaled: normalized dataframe
    """
    if scaler is not None:
        datascaled = pd.DataFrame(scaler.transform(data),
                                  index=data.index.values)
        datascaled.columns = data.columns.values
    else:
        if norm == "min_max":
            scaler = preprocessing.MinMaxScaler()
        elif norm == "max_abs":
            scaler = preprocessing.MaxAbsScaler()
        else:
            scaler = preprocessing.StandardScaler()
        datascaled = pd.DataFrame(scaler.fit_transform(data),
                                  index=data.index.values)
        datascaled.columns = data.columns.values
    return datascaled, scaler


# deprecated - use sklearn.model_selection.train_test_split instead 
示例12
def _get_feature_scaler(self):
        """Get a feature value scaler based on the model settings"""
        if self.config.model_settings is None:
            scale_type = None
        else:
            scale_type = self.config.model_settings.get("feature_scaler")
        scaler = {
            "std-dev": StandardScaler(with_mean=False),
            "max-abs": MaxAbsScaler(),
        }.get(scale_type)
        return scaler 
示例13
def _get_feature_scaler(scale_type):
        """Get a feature value scaler based on the model settings"""
        scaler = {
            "std-dev": StandardScaler(with_mean=False),
            "max-abs": MaxAbsScaler(),
        }.get(scale_type)
        return scaler 
示例14
def scale(df, scaling=None):
    """Scale data included in pandas dataframe.

    Parameters
    ----------
    df : pandas dataframe
        dataframe to scale
    scaling : 'maxabs', 'minmax', 'std', or None, optional (default 'std')
        type of scaling to apply
    """

    if scaling is None or scaling.lower() == 'none':
        return df

    df = df.dropna(axis=1, how='any')

    # Scaling data
    if scaling == 'maxabs':
        # Normalizing -1 to 1
        scaler = MaxAbsScaler()
    elif scaling == 'minmax':
        # Scaling to [0,1]
        scaler = MinMaxScaler()
    else:
        # Standard normalization
        scaler = StandardScaler()

    mat = df.as_matrix()
    mat = scaler.fit_transform(mat)

    df = pd.DataFrame(mat, columns=df.columns)

    return df 
示例15
def impute_and_scale(df, scaling='std'):
    """Impute missing values with mean and scale data included in pandas dataframe.

    Parameters
    ----------
    df : pandas dataframe
        dataframe to impute and scale
    scaling : 'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional (default 'std')
        type of scaling to apply
    """

    df = df.dropna(axis=1, how='all')

    #imputer = Imputer(strategy='mean', axis=0)
    imputer = Imputer(strategy='mean')
    mat = imputer.fit_transform(df)

    if scaling is None or scaling.lower() == 'none':
        return pd.DataFrame(mat, columns=df.columns)

    if scaling == 'maxabs':
        scaler = MaxAbsScaler()
    elif scaling == 'minmax':
        scaler = MinMaxScaler()
    else:
        scaler = StandardScaler()

    mat = scaler.fit_transform(mat)

    df = pd.DataFrame(mat, columns=df.columns)

    return df 
示例16
def impute_and_scale(df, scaling='std'):
    """Impute missing values with mean and scale data included in pandas dataframe.

    Parameters
    ----------
    df : pandas dataframe
        dataframe to impute and scale
    scaling : 'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional (default 'std')
        type of scaling to apply
    """

    df = df.dropna(axis=1, how='all')

    imputer = Imputer(strategy='mean')
    mat = imputer.fit_transform(df)

    if scaling is None or scaling.lower() == 'none':
        return pd.DataFrame(mat, columns=df.columns)

    if scaling == 'maxabs':
        scaler = MaxAbsScaler()
    elif scaling == 'minmax':
        scaler = MinMaxScaler()
    else:
        scaler = StandardScaler()

    mat = scaler.fit_transform(mat)

    df = pd.DataFrame(mat, columns=df.columns)

    return df 
示例17
def load_data(train_path, test_path, gParameters):

    print('Loading data...')
    df_train = (pd.read_csv(train_path,header=None).values).astype('float32')
    df_test = (pd.read_csv(test_path,header=None).values).astype('float32')
    print('done')

    print('df_train shape:', df_train.shape)
    print('df_test shape:', df_test.shape)

    seqlen = df_train.shape[1]

    df_y_train = df_train[:,0].astype('int')
    df_y_test = df_test[:,0].astype('int')

    Y_train = np_utils.to_categorical(df_y_train,gParameters['classes'])
    Y_test = np_utils.to_categorical(df_y_test,gParameters['classes'])

    df_x_train = df_train[:, 1:seqlen].astype(np.float32)
    df_x_test = df_test[:, 1:seqlen].astype(np.float32)

#        X_train = df_x_train.as_matrix()
#        X_test = df_x_test.as_matrix()

    X_train = df_x_train
    X_test = df_x_test

    scaler = MaxAbsScaler()
    mat = np.concatenate((X_train, X_test), axis=0)
    mat = scaler.fit_transform(mat)

    X_train = mat[:X_train.shape[0], :]
    X_test = mat[X_train.shape[0]:, :]

    return X_train, Y_train, X_test, Y_test 
示例18
def scale_array(mat, scaling=None):
    """ Scale data included in numpy array.
        
        Parameters
        ----------
        mat : numpy array
            Array to scale
        scaling : string
            String describing type of scaling to apply.
            Options recognized: 'maxabs', 'minmax', 'std'.
            'maxabs' : scales data to range [-1 to 1].
            'minmax' : scales data to range [-1 to 1].
            'std'    : scales data to normal variable with mean 0 and standard deviation 1.
            (Default: None, no scaling).

        Return
        ----------
        Returns the numpy array scaled by the method specified. \
        If no scaling method is specified, it returns the numpy \
        array unmodified.
    """
    
    if scaling is None or scaling.lower() == 'none':
        return mat

    # Scaling data
    if scaling == 'maxabs':
        # Scaling to [-1, 1]
        scaler = MaxAbsScaler(copy=False)
    elif scaling == 'minmax':
        # Scaling to [0,1]
        scaler = MinMaxScaler(copy=False)
    else:
        # Standard normalization
        scaler = StandardScaler(copy=False)
    
    return scaler.fit_transform(mat) 
示例19
def drop_impute_and_scale_dataframe(df, scaling='std', imputing='mean', dropna='all'):
    """Impute missing values with mean and scale data included in pandas dataframe.

    Parameters
    ----------
    df : pandas dataframe
        dataframe to process
    scaling : string
        String describing type of scaling to apply.
        'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional
        (Default 'std')
    imputing : string
        String describing type of imputation to apply.
        'mean' replace missing values with mean value along the column,
        'median' replace missing values with median value along the column,
        'most_frequent' replace missing values with most frequent value along column
        (Default: 'mean').
    dropna : string
        String describing strategy for handling missing values.
        'all' if all values are NA, drop that column.
        'any' if any NA values are present, dropt that column.
        (Default: 'all').

    Return
    ----------
    Returns the data frame after handling missing values and scaling.

    """

    if dropna:
        df = df.dropna(axis=1, how=dropna)
    else:
        empty_cols = df.columns[df.notnull().sum() == 0]
        df[empty_cols] = 0

    if imputing is None or imputing.lower() == 'none':
        mat = df.values
    else:
#        imputer = Imputer(strategy=imputing, axis=0)
#        imputer = SimpleImputer(strategy=imputing)
        # Next line is from conditional import. axis=0 is default
        # in old version so it is not necessary.
        imputer = Imputer(strategy=imputing)
        mat = imputer.fit_transform(df.values)

    if scaling is None or scaling.lower() == 'none':
        return pd.DataFrame(mat, columns=df.columns)

    if scaling == 'maxabs':
        scaler = MaxAbsScaler()
    elif scaling == 'minmax':
        scaler = MinMaxScaler()
    else:
        scaler = StandardScaler()

    mat = scaler.fit_transform(mat)
    df = pd.DataFrame(mat, columns=df.columns)

    return df