Python源码示例:sklearn.preprocessing.MaxAbsScaler()
示例1
def test_01_xgb_classifier(self):
print("\ntest 01 (xgb classifier with preprocessing) [multi-class]\n")
model = XGBClassifier()
pipeline_obj = Pipeline([
('scaler',MaxAbsScaler()),
("model", model)
])
pipeline_obj.fit(self.X,self.Y)
file_name = "test01xgboost.pmml"
xgboost_to_pmml(pipeline_obj, self.features, 'Species', file_name)
model_name = self.adapa_utility.upload_to_zserver(file_name)
predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
model_pred = pipeline_obj.predict(self.X)
model_prob = pipeline_obj.predict_proba(self.X)
self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
示例2
def test_02_lgbm_classifier(self):
print("\ntest 02 (lgbm classifier with preprocessing) [multi-class]\n")
model = LGBMClassifier()
pipeline_obj = Pipeline([
('scaler',MaxAbsScaler()),
("model", model)
])
pipeline_obj.fit(self.X,self.Y)
file_name = "test02lgbm.pmml"
lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
model_name = self.adapa_utility.upload_to_zserver(file_name)
predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
model_pred = pipeline_obj.predict(self.X)
model_prob = pipeline_obj.predict_proba(self.X)
self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
示例3
def train_model(self, train_file_path, model_path):
print("==> Load the data ...")
X_train, Y_train = self.load_file(train_file_path)
print(train_file_path, shape(X_train))
print("==> Train the model ...")
min_max_scaler = preprocessing.MaxAbsScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
clf = RandomForestRegressor(n_estimators=self.n_estimators)
clf.fit(X_train_minmax.toarray(), Y_train)
print("==> Save the model ...")
pickle.dump(clf, open(model_path, 'wb'))
scaler_path = model_path.replace('.pkl', '.scaler.pkl')
pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
return clf
示例4
def train_model(self, train_file_path, model_path):
print("==> Load the data ...")
X_train, Y_train = self.load_file(train_file_path)
print(train_file_path, shape(X_train))
print("==> Train the model ...")
min_max_scaler = preprocessing.MaxAbsScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
clf = GradientBoostingRegressor(n_estimators=self.n_estimators)
clf.fit(X_train_minmax.toarray(), Y_train)
print("==> Save the model ...")
pickle.dump(clf, open(model_path, 'wb'))
scaler_path = model_path.replace('.pkl', '.scaler.pkl')
pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
return clf
示例5
def normalize_cv(X, y, i, norm="zero_score"):
X_test = X[i]
y_test = y[i]
X_train = pd.concat(X[:i] + X[i+1:])
y_train = pd.concat(y[:i] + y[i+1:])
if norm == "min_max":
scaler = preprocessing.MinMaxScaler()
elif norm == "max_abs":
scaler = preprocessing.MaxAbsScaler()
else:
scaler = preprocessing.StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train),
index=y_train.index.values)
X_train.columns = X[i].columns.values
X_test = pd.DataFrame(scaler.transform(X_test), index=y_test.index.values)
X_test.columns = X[i].columns.values
return X_train, X_test, y_train, y_test
示例6
def transform(self, X):
"""Scale the data.
Parameters
----------
X : array-like, shape = (n_samples, n_timestamps)
Data to scale.
Returns
-------
X_new : array-like, shape = (n_samples, n_timestamps)
Scaled data.
"""
X = check_array(X, dtype='float64')
scaler = SklearnMaxAbsScaler()
X_new = scaler.fit_transform(X.T).T
return X_new
示例7
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
self.assertIs(df.preprocessing.FunctionTransformer,
pp.FunctionTransformer)
self.assertIs(df.preprocessing.Imputer, pp.Imputer)
self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
示例8
def load_data():
data_path = args['in']
df = (pd.read_csv(data_path,skiprows=1).values).astype('float32')
df_y = df[:,0].astype('float32')
df_x = df[:, 1:PL].astype(np.float32)
# scaler = MaxAbsScaler()
scaler = StandardScaler()
df_x = scaler.fit_transform(df_x)
X_train, X_test, Y_train, Y_test = train_test_split(df_x, df_y, test_size= 0.20, random_state=42)
print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)
return X_train, Y_train, X_test, Y_test
示例9
def sparse_normalize_dataset(dataset):
""" Normaliza dataset without removing the sparseness structure of the data """
#Remove mean of dataset
dataset = dataset - np.mean(dataset)
#Truncate to +/-3 standard deviations and scale to -1 to 1
std_dev = 3 * np.std(dataset)
dataset = np.maximum(np.minimum(dataset, std_dev), -std_dev) / std_dev
#Rescale from [-1, 1] to [0.1, 0.9]
dataset = (dataset + 1) * 0.4 + 0.1
#dataset = (dataset-np.amin(dataset))/(np.amax(dataset)-np.amin(dataset))
return dataset
#return preprocessing.MaxAbsScaler().fit_transform(dataset)
示例10
def scale_by_max_value(X):
"""
Scale each feature by its abs maximum value.
Keyword arguments:
X -- The feature vectors
"""
if verbose:
print '\nScaling to the range [-1,1] ...'
max_abs_scaler = preprocessing.MaxAbsScaler()
return max_abs_scaler.fit_transform(X)
示例11
def normalize(data, norm="zero_score", scaler=None):
"""Normalize pandas Dataframe.
@param data: Input dataframe
@param norm: normalization method [default: zero_score standardization],
alternatives: 'min_max', 'max_abs'
@return datascaled: normalized dataframe
"""
if scaler is not None:
datascaled = pd.DataFrame(scaler.transform(data),
index=data.index.values)
datascaled.columns = data.columns.values
else:
if norm == "min_max":
scaler = preprocessing.MinMaxScaler()
elif norm == "max_abs":
scaler = preprocessing.MaxAbsScaler()
else:
scaler = preprocessing.StandardScaler()
datascaled = pd.DataFrame(scaler.fit_transform(data),
index=data.index.values)
datascaled.columns = data.columns.values
return datascaled, scaler
# deprecated - use sklearn.model_selection.train_test_split instead
示例12
def _get_feature_scaler(self):
"""Get a feature value scaler based on the model settings"""
if self.config.model_settings is None:
scale_type = None
else:
scale_type = self.config.model_settings.get("feature_scaler")
scaler = {
"std-dev": StandardScaler(with_mean=False),
"max-abs": MaxAbsScaler(),
}.get(scale_type)
return scaler
示例13
def _get_feature_scaler(scale_type):
"""Get a feature value scaler based on the model settings"""
scaler = {
"std-dev": StandardScaler(with_mean=False),
"max-abs": MaxAbsScaler(),
}.get(scale_type)
return scaler
示例14
def scale(df, scaling=None):
"""Scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to scale
scaling : 'maxabs', 'minmax', 'std', or None, optional (default 'std')
type of scaling to apply
"""
if scaling is None or scaling.lower() == 'none':
return df
df = df.dropna(axis=1, how='any')
# Scaling data
if scaling == 'maxabs':
# Normalizing -1 to 1
scaler = MaxAbsScaler()
elif scaling == 'minmax':
# Scaling to [0,1]
scaler = MinMaxScaler()
else:
# Standard normalization
scaler = StandardScaler()
mat = df.as_matrix()
mat = scaler.fit_transform(mat)
df = pd.DataFrame(mat, columns=df.columns)
return df
示例15
def impute_and_scale(df, scaling='std'):
"""Impute missing values with mean and scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to impute and scale
scaling : 'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional (default 'std')
type of scaling to apply
"""
df = df.dropna(axis=1, how='all')
#imputer = Imputer(strategy='mean', axis=0)
imputer = Imputer(strategy='mean')
mat = imputer.fit_transform(df)
if scaling is None or scaling.lower() == 'none':
return pd.DataFrame(mat, columns=df.columns)
if scaling == 'maxabs':
scaler = MaxAbsScaler()
elif scaling == 'minmax':
scaler = MinMaxScaler()
else:
scaler = StandardScaler()
mat = scaler.fit_transform(mat)
df = pd.DataFrame(mat, columns=df.columns)
return df
示例16
def impute_and_scale(df, scaling='std'):
"""Impute missing values with mean and scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to impute and scale
scaling : 'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional (default 'std')
type of scaling to apply
"""
df = df.dropna(axis=1, how='all')
imputer = Imputer(strategy='mean')
mat = imputer.fit_transform(df)
if scaling is None or scaling.lower() == 'none':
return pd.DataFrame(mat, columns=df.columns)
if scaling == 'maxabs':
scaler = MaxAbsScaler()
elif scaling == 'minmax':
scaler = MinMaxScaler()
else:
scaler = StandardScaler()
mat = scaler.fit_transform(mat)
df = pd.DataFrame(mat, columns=df.columns)
return df
示例17
def load_data(train_path, test_path, gParameters):
print('Loading data...')
df_train = (pd.read_csv(train_path,header=None).values).astype('float32')
df_test = (pd.read_csv(test_path,header=None).values).astype('float32')
print('done')
print('df_train shape:', df_train.shape)
print('df_test shape:', df_test.shape)
seqlen = df_train.shape[1]
df_y_train = df_train[:,0].astype('int')
df_y_test = df_test[:,0].astype('int')
Y_train = np_utils.to_categorical(df_y_train,gParameters['classes'])
Y_test = np_utils.to_categorical(df_y_test,gParameters['classes'])
df_x_train = df_train[:, 1:seqlen].astype(np.float32)
df_x_test = df_test[:, 1:seqlen].astype(np.float32)
# X_train = df_x_train.as_matrix()
# X_test = df_x_test.as_matrix()
X_train = df_x_train
X_test = df_x_test
scaler = MaxAbsScaler()
mat = np.concatenate((X_train, X_test), axis=0)
mat = scaler.fit_transform(mat)
X_train = mat[:X_train.shape[0], :]
X_test = mat[X_train.shape[0]:, :]
return X_train, Y_train, X_test, Y_test
示例18
def scale_array(mat, scaling=None):
""" Scale data included in numpy array.
Parameters
----------
mat : numpy array
Array to scale
scaling : string
String describing type of scaling to apply.
Options recognized: 'maxabs', 'minmax', 'std'.
'maxabs' : scales data to range [-1 to 1].
'minmax' : scales data to range [-1 to 1].
'std' : scales data to normal variable with mean 0 and standard deviation 1.
(Default: None, no scaling).
Return
----------
Returns the numpy array scaled by the method specified. \
If no scaling method is specified, it returns the numpy \
array unmodified.
"""
if scaling is None or scaling.lower() == 'none':
return mat
# Scaling data
if scaling == 'maxabs':
# Scaling to [-1, 1]
scaler = MaxAbsScaler(copy=False)
elif scaling == 'minmax':
# Scaling to [0,1]
scaler = MinMaxScaler(copy=False)
else:
# Standard normalization
scaler = StandardScaler(copy=False)
return scaler.fit_transform(mat)
示例19
def drop_impute_and_scale_dataframe(df, scaling='std', imputing='mean', dropna='all'):
"""Impute missing values with mean and scale data included in pandas dataframe.
Parameters
----------
df : pandas dataframe
dataframe to process
scaling : string
String describing type of scaling to apply.
'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional
(Default 'std')
imputing : string
String describing type of imputation to apply.
'mean' replace missing values with mean value along the column,
'median' replace missing values with median value along the column,
'most_frequent' replace missing values with most frequent value along column
(Default: 'mean').
dropna : string
String describing strategy for handling missing values.
'all' if all values are NA, drop that column.
'any' if any NA values are present, dropt that column.
(Default: 'all').
Return
----------
Returns the data frame after handling missing values and scaling.
"""
if dropna:
df = df.dropna(axis=1, how=dropna)
else:
empty_cols = df.columns[df.notnull().sum() == 0]
df[empty_cols] = 0
if imputing is None or imputing.lower() == 'none':
mat = df.values
else:
# imputer = Imputer(strategy=imputing, axis=0)
# imputer = SimpleImputer(strategy=imputing)
# Next line is from conditional import. axis=0 is default
# in old version so it is not necessary.
imputer = Imputer(strategy=imputing)
mat = imputer.fit_transform(df.values)
if scaling is None or scaling.lower() == 'none':
return pd.DataFrame(mat, columns=df.columns)
if scaling == 'maxabs':
scaler = MaxAbsScaler()
elif scaling == 'minmax':
scaler = MinMaxScaler()
else:
scaler = StandardScaler()
mat = scaler.fit_transform(mat)
df = pd.DataFrame(mat, columns=df.columns)
return df