Python源码示例:sklearn.preprocessing.PolynomialFeatures()
示例1
def fit(self, x, y=None):
if y is not None:
xdot = y
else:
xdot = self.derivative.transform(x)
if self.operators is not None:
feature_transformer = SymbolicFeatures(
exponents=np.linspace(1, self.degree, self.degree), operators=self.operators
)
else:
feature_transformer = PolynomialFeatures(degree=self.degree, include_bias=False)
steps = [
("features", feature_transformer),
("model", STRidge(alpha=self.alpha, threshold=self.threshold, **self.kw)),
]
self.model = MultiOutputRegressor(Pipeline(steps), n_jobs=self.n_jobs)
self.model.fit(x, xdot)
self.n_input_features_ = self.model.estimators_[0].steps[0][1].n_input_features_
self.n_output_features_ = self.model.estimators_[0].steps[0][1].n_output_features_
return self
示例2
def test_transformed_shape(self):
# checks if the transformed objects have the correct columns
a = dpp.PolynomialFeatures()
a.fit(X)
n_cols = len(a.get_feature_names())
# dask array
assert a.transform(X).shape[1] == n_cols
# numpy array
assert a.transform(X.compute()).shape[1] == n_cols
# dask dataframe
assert a.transform(df).shape[1] == n_cols
# pandas dataframe
assert a.transform(df.compute()).shape[1] == n_cols
X_nan_rows = df.values
df_none_divisions = X_nan_rows.to_dask_dataframe(columns=df.columns)
# dask array with nan rows
assert a.transform(X_nan_rows).shape[1] == n_cols
# dask data frame with nan rows
assert a.transform(df_none_divisions).shape[1] == n_cols
示例3
def test_model_polynomial_features_float_degree_2(self):
X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
[0, 3.2, 4.7, -8.9]])
model = PolynomialFeatures(degree=2).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn polynomial features",
[("input", FloatTensorType([None, X.shape[1]]))],
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnPolynomialFeaturesFloatDegree2",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
)
示例4
def test_model_polynomial_features_int_degree_2(self):
X = np.array([
[1, 3, 4, 0],
[2, 3, 4, 1],
[1, -4, 3, 7],
[3, 10, -9, 5],
[1, 0, 10, 5],
])
model = PolynomialFeatures(degree=2).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn polynomial features",
[("input", Int64TensorType([None, X.shape[1]]))],
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
basename="SklearnPolynomialFeaturesIntDegree2",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
)
示例5
def test_model_polynomial_features_float_degree_3(self):
X = np.array([[1.2, 3.2, 1.2], [4.3, 3.2, 4.5], [3.2, 4.7, 1.1]])
model = PolynomialFeatures(degree=3).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn polynomial features",
[("input", FloatTensorType([None, X.shape[1]]))],
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnPolynomialFeaturesFloatDegree3",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
)
示例6
def test_model_polynomial_features_int_degree_3(self):
X = np.array([
[1, 3, 33],
[4, 1, -11],
[3, 7, -3],
[3, 5, 4],
[1, 0, 3],
[5, 4, 9],
])
model = PolynomialFeatures(degree=3).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn polynomial features",
[("input", Int64TensorType([None, X.shape[1]]))],
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
basename="SklearnPolynomialFeaturesIntDegree3",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
)
示例7
def test_model_polynomial_features_float_degree_4(self):
X = np.array([[1.2, 3.2, 3.1, 1.3], [4.3, 3.2, 0.5, 1.3],
[3.2, 4.7, 5.4, 7.1]])
model = PolynomialFeatures(degree=4).fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn polynomial features",
[("input", FloatTensorType([None, X.shape[1]]))],
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnPolynomialFeaturesFloatDegree4-Dec4",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
)
示例8
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
self.assertIs(df.preprocessing.FunctionTransformer,
pp.FunctionTransformer)
self.assertIs(df.preprocessing.Imputer, pp.Imputer)
self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
示例9
def polynomial_regression(self, assign=True, degree=2, **kwargs):
"""
有监督学习回归器,使用:
make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))
:param assign: 是否保存实例后的LinearRegression对象,默认True,self.reg = reg
:param degree: 多项式拟合参数,默认2
:param kwargs: 由make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))
即关键字参数**kwargs全部传递给LinearRegression做为构造参数
:return: 实例化的回归对象
"""
reg = make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))
if assign:
self.reg = reg
return reg
示例10
def sample_1031_3():
"""
10.3.1_3 猪老三使用回归预测股价:PolynomialFeatures
:return:
"""
train_x, train_y_regress, train_y_classification, pig_three_feature, \
test_x, test_y_regress, test_y_classification, kl_another_word_feature_test = sample_1031_1()
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
# pipeline套上 degree=3 + LinearRegression
estimator = make_pipeline(PolynomialFeatures(degree=3),
LinearRegression())
# 继续使用regress_process,区别是estimator变了
regress_process(estimator, train_x, train_y_regress, test_x,
test_y_regress)
plt.show()
示例11
def evaluate_timestamp(self, timestamp):
"""
Gets datetime object and calculates as a prediction or as an
interpolation
- timestamp: datetime object (date_1/date_2 in `calculate`)
> Returns float of prediction or interpolation
"""
if (
datetime.date(1993, 1, 15) > timestamp.date()
or datetime.date(2019, 2, 7) < timestamp.date()
):
# Perform some data preparation before being
# able to pass it to the model
return self.poly_model.predict(
PolynomialFeatures(degree=3).fit_transform(
np.array([timestamp.timestamp()]).reshape(1, -1)
)
)[0][0]
return self.model(timestamp.timestamp())
示例12
def poly_inter(self, data):
# define x values for data points
X = np.linspace(0, data.shape[0] - 1, data.shape[0])[:, np.newaxis]
# define pipeline and fit model
model = make_pipeline(PolynomialFeatures(self.degree), Ridge())
model.fit(X, data)
if self.plot: plot_poly(X, model.predict(X), data)
# predict next interpolated value
last = model.predict(np.array([[data.shape[0] - 1]]))
pred = model.predict(np.array([[data.shape[0]]]))
# return slope of last point
return pred[0]/last[0]
示例13
def feature_transform(X, mode='polynomial', degree=1):
poly = PolynomialFeatures(degree)
process_X = poly.fit_transform(X)
if mode == 'legendre':
lege = legendre(degree)
process_X = lege(process_X)
return process_X
示例14
def polyfeatures(X):
poly = PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)
X_poly = poly.fit_transform(X)
X = pd.DataFrame(X_poly, columns=poly.get_feature_names())
return X
示例15
def learn_on_k_best(archive: utils.Archive[utils.MultiValue], k: int) -> ArrayLike:
"""Approximate optimum learnt from the k best.
Parameters
----------
archive: utils.Archive[utils.Value]
"""
items = list(archive.items_as_arrays())
dimension = len(items[0][0])
# Select the k best.
first_k_individuals = [x for x in sorted(items, key=lambda indiv: archive[indiv[0]].get_estimation("pessimistic"))[:k]]
assert len(first_k_individuals) == k
# Recenter the best.
middle = np.array(sum(p[0] for p in first_k_individuals) / k)
normalization = 1e-15 + np.sqrt(np.sum((first_k_individuals[-1][0] - first_k_individuals[0][0])**2))
y = [archive[c[0]].get_estimation("pessimistic") for c in first_k_individuals]
X = np.asarray([(c[0] - middle) / normalization for c in first_k_individuals])
# We need SKLearn.
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
polynomial_features = PolynomialFeatures(degree=2)
X2 = polynomial_features.fit_transform(X)
# Fit a linear model.
model = LinearRegression()
model.fit(X2, y)
# Find the minimum of the quadratic model.
optimizer = OnePlusOne(parametrization=dimension, budget=dimension * dimension + dimension + 500)
try:
optimizer.minimize(lambda x: float(model.predict(polynomial_features.fit_transform(np.asarray([x])))))
except ValueError:
raise InfiniteMetaModelOptimum("Infinite meta-model optimum in learn_on_k_best.")
minimum = optimizer.provide_recommendation().value
if np.sum(minimum**2) > 1.:
raise InfiniteMetaModelOptimum("huge meta-model optimum in learn_on_k_best.")
return middle + normalization * minimum
示例16
def get_features(x,degree):
print("Extending feature vectors with degree "+str(degree)+" ..")
featureExtender = PolynomialFeatures(degree, interaction_only=True) # This function generates low degree monomials. It's a little bit slow though. You may write your own function using recursion.
tmp=[]
for current_x in x:
tmp.append(featureExtender.fit_transform(np.array(current_x).reshape(1, -1))[0].tolist()) # Extend feature vectors with monomials
return tmp
示例17
def fit(self, X, y):
sdim, fdim = X.shape
for i in range(self.n_estimators):
ridge = Ridge(alpha=self.alpha, normalize=self.normalize, random_state=self.random_state)
fidx = self._random_feature_idx(fdim, self.random_state+i*100)
sidx = self._random_sample_idx(sdim, self.random_state+i*10)
X_tmp = X[sidx][:,fidx]
if self.poly:
X_tmp = PolynomialFeatures(degree=2).fit_transform(X_tmp)[:,1:]
ridge.fit(X_tmp, y[sidx])
self.ridge_list[i] = ridge
self.feature_idx_list[i] = fidx
return self
示例18
def predict(self, X):
y_pred = np.zeros((X.shape[0], self.n_estimators))
for i in range(self.n_estimators):
fidx = self.feature_idx_list[i]
ridge = self.ridge_list[i]
X_tmp = X[:,fidx]
if self.poly:
X_tmp = PolynomialFeatures(degree=2).fit_transform(X_tmp)[:,1:]
y_pred[:,i] = ridge.predict(X_tmp)
y_pred = np.mean(y_pred, axis=1)
return y_pred
示例19
def poly_fit(x, y, degree, fit="RANSAC"):
# check if we can use RANSAC
if fit == "RANSAC":
try:
# ignore ImportWarnings in sklearn
with warnings.catch_warnings():
warnings.simplefilter("ignore", ImportWarning)
import sklearn.linear_model as sklin
import sklearn.preprocessing as skpre
except ImportError:
warnings.warn(
"fitting mode 'RANSAC' requires the package sklearn, using"
+ " 'poly' instead",
RuntimeWarning)
fit = "poly"
if fit == "poly":
return np.polyfit(x, y, degree)
elif fit == "RANSAC":
model = sklin.RANSACRegressor(sklin.LinearRegression(fit_intercept=False))
xdat = np.asarray(x)
if len(xdat.shape) == 1:
# interpret 1d-array as list of len(x) samples instead of
# one sample of length len(x)
xdat = xdat.reshape(-1, 1)
polydat = skpre.PolynomialFeatures(degree).fit_transform(xdat)
try:
model.fit(polydat, y)
coef = model.estimator_.coef_[::-1]
except ValueError:
warnings.warn(
"RANSAC did not reach consensus, "
+ "using numpy's polyfit",
RuntimeWarning)
coef = np.polyfit(x, y, degree)
return coef
else:
raise ValueError("invalid fitting mode ({})".format(fit))
示例20
def test_partial_dependence_easy_target(est, power):
# If the target y only depends on one feature in an obvious way (linear or
# quadratic) then the partial dependence for that feature should reflect
# it.
# We here fit a linear regression_data model (with polynomial features if
# needed) and compute r_squared to check that the partial dependence
# correctly reflects the target.
rng = np.random.RandomState(0)
n_samples = 100
target_variable = 2
X = rng.normal(size=(n_samples, 5))
y = X[:, target_variable]**power
est.fit(X, y)
averaged_predictions, values = partial_dependence(
est, features=[target_variable], X=X, grid_resolution=1000)
new_X = values[0].reshape(-1, 1)
new_y = averaged_predictions[0]
# add polynomial features if needed
new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
lr = LinearRegression().fit(new_X, new_y)
r2 = r2_score(new_y, lr.predict(new_X))
assert r2 > .99
示例21
def train_polynomial_regression_model(self, degree):
"""训练多项式回归模型"""
poly_feat = PolynomialFeatures(degree=degree)
x_tranformed = poly_feat.fit_transform(self.dataset_X)
linear = LinearRegression()
linear.fit(x_tranformed, self.dataset_Y)
self.predict = lambda x: linear.predict(poly_feat.transform(x))[0]
示例22
def svc_example(n_samples = 10000, n_features = 4):
from sklearn.svm import LinearSVC
from sklearn.preprocessing import PolynomialFeatures
from sklearn.datasets import make_classification
X,Y = make_classification(n_samples, n_features)
#pp = PolynomialFeatures(degree=3)
#X = pp.fit_transform(X)
m = LinearSVC()
m.fit(X,Y)
示例23
def get_polynomials(features, poly_degree):
r"""Generate interactions that are products of distinct features.
Parameters
----------
features : pandas.DataFrame
Dataframe containing the features for generating interactions.
poly_degree : int
The degree of the polynomial features.
Returns
-------
poly_features : numpy array
The interaction features only.
poly_fnames : list
List of polynomial feature names.
References
----------
You can find more information on polynomial interactions here [POLY]_.
.. [POLY] http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html
"""
polyf = PolynomialFeatures(interaction_only=True,
degree=poly_degree,
include_bias=False)
poly_features = polyf.fit_transform(features)
poly_fnames = polyf.get_feature_names()
return poly_features, poly_fnames
#
# Function get_text_features
#
示例24
def multipipeline():
multipipeline = MultiPipeline(
{
'scaler': StandardScaler(),
'poly': PolynomialFeatures(degree=2)
}
)
return multipipeline
示例25
def fit(self, y_train, fh=None, X_train=None):
"""Fit to training data.
Parameters
----------
y_train : pd.Series
Target time series to which to fit the forecaster.
fh : int, list or np.array, optional (default=None)
The forecasters horizon with the steps ahead to to predict.
X_train : pd.DataFrame, optional (default=None)
Exogenous variables are ignored
Returns
-------
self : returns an instance of self.
"""
if X_train is not None:
raise NotImplementedError()
self._set_oh(y_train)
self._set_fh(fh)
# for default regressor, set fit_intercept=False as we generate a
# dummy variable in polynomial features
r = self.regressor if self.regressor is not None else LinearRegression(
fit_intercept=False) #
self.regressor_ = make_pipeline(PolynomialFeatures(
degree=self.degree,
include_bias=self.with_intercept),
r)
x = y_train.index.values.reshape(-1, 1)
self.regressor_.fit(x, y_train.values)
self._is_fitted = True
return self
示例26
def b_fit_score(self, x, y):
""" Compute the RECI fit score
Args:
x (numpy.ndarray): Variable 1
y (numpy.ndarray): Variable 2
Returns:
float: RECI fit score
"""
x = np.reshape(minmax_scale(x), (-1, 1))
y = np.reshape(minmax_scale(y), (-1, 1))
poly = PolynomialFeatures(degree=self.degree)
poly_x = poly.fit_transform(x)
poly_x[:,1] = 0
poly_x[:,2] = 0
regressor = LinearRegression()
regressor.fit(poly_x, y)
y_predict = regressor.predict(poly_x)
error = mean_squared_error(y_predict, y)
return error
示例27
def add_curve(f, x, y, source, group_size=30, color='black', alpha=1, smooth=1, y_range_name='default'):
if f:
# https://scikit-learn.org/stable/auto_examples/linear_model/plot_polynomial_interpolation.html
source = source[[x, y]].dropna()
degree = min(10, max(1, len(source) // group_size))
log.debug(f'Fitting polynomial of degree {degree} (ridge alpha {smooth})')
xf = source[x].values.astype('float64')
nxf = (xf - xf[0]) / (xf[1] - xf[0])
nxf2 = nxf[:, np.newaxis]
y2 = source[y].values[:, np.newaxis]
model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=smooth))
model.fit(nxf2, y2)
f.line(source[x], model.predict(nxf2)[:, 0], color=color, alpha=alpha, y_range_name=y_range_name)
示例28
def test_basic(self):
a = dpp.PolynomialFeatures()
b = spp.PolynomialFeatures()
a.fit(X)
b.fit(X.compute())
assert_estimator_equal(a._transformer, b)
示例29
def test_input_types(self):
a = dpp.PolynomialFeatures()
b = spp.PolynomialFeatures()
assert_estimator_equal(a.fit(df), a.fit(df.compute()))
assert_estimator_equal(a.fit(df), a.fit(df.compute().values))
assert_estimator_equal(a.fit(df.values), a.fit(df.compute().values))
assert_estimator_equal(a.fit(df), b.fit(df.compute()))
assert_estimator_equal(a.fit(df), b.fit(df.compute().values))
示例30
def test_array_transform(self):
a = dpp.PolynomialFeatures()
b = spp.PolynomialFeatures()
res_a = a.fit_transform(X)
res_b = b.fit_transform(X.compute())
assert_estimator_equal(a, b)
assert dask.is_dask_collection(res_a)
assert_eq_ar(res_a, res_b)