Python源码示例:sklearn.preprocessing.PolynomialFeatures()

示例1
def fit(self, x, y=None):
        if y is not None:
            xdot = y
        else:
            xdot = self.derivative.transform(x)

        if self.operators is not None:
            feature_transformer = SymbolicFeatures(
                exponents=np.linspace(1, self.degree, self.degree), operators=self.operators
            )
        else:
            feature_transformer = PolynomialFeatures(degree=self.degree, include_bias=False)

        steps = [
            ("features", feature_transformer),
            ("model", STRidge(alpha=self.alpha, threshold=self.threshold, **self.kw)),
        ]
        self.model = MultiOutputRegressor(Pipeline(steps), n_jobs=self.n_jobs)
        self.model.fit(x, xdot)

        self.n_input_features_ = self.model.estimators_[0].steps[0][1].n_input_features_
        self.n_output_features_ = self.model.estimators_[0].steps[0][1].n_output_features_
        return self 
示例2
def test_transformed_shape(self):
        # checks if the transformed objects have the correct columns
        a = dpp.PolynomialFeatures()
        a.fit(X)
        n_cols = len(a.get_feature_names())
        # dask array
        assert a.transform(X).shape[1] == n_cols
        # numpy array
        assert a.transform(X.compute()).shape[1] == n_cols
        # dask dataframe
        assert a.transform(df).shape[1] == n_cols
        # pandas dataframe
        assert a.transform(df.compute()).shape[1] == n_cols
        X_nan_rows = df.values
        df_none_divisions = X_nan_rows.to_dask_dataframe(columns=df.columns)
        # dask array with nan rows
        assert a.transform(X_nan_rows).shape[1] == n_cols
        # dask data frame with nan rows
        assert a.transform(df_none_divisions).shape[1] == n_cols 
示例3
def test_model_polynomial_features_float_degree_2(self):
        X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
                      [0, 3.2, 4.7, -8.9]])
        model = PolynomialFeatures(degree=2).fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn polynomial features",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnPolynomialFeaturesFloatDegree2",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
示例4
def test_model_polynomial_features_int_degree_2(self):
        X = np.array([
            [1, 3, 4, 0],
            [2, 3, 4, 1],
            [1, -4, 3, 7],
            [3, 10, -9, 5],
            [1, 0, 10, 5],
        ])
        model = PolynomialFeatures(degree=2).fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn polynomial features",
            [("input", Int64TensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnPolynomialFeaturesIntDegree2",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
示例5
def test_model_polynomial_features_float_degree_3(self):
        X = np.array([[1.2, 3.2, 1.2], [4.3, 3.2, 4.5], [3.2, 4.7, 1.1]])
        model = PolynomialFeatures(degree=3).fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn polynomial features",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnPolynomialFeaturesFloatDegree3",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
示例6
def test_model_polynomial_features_int_degree_3(self):
        X = np.array([
            [1, 3, 33],
            [4, 1, -11],
            [3, 7, -3],
            [3, 5, 4],
            [1, 0, 3],
            [5, 4, 9],
        ])
        model = PolynomialFeatures(degree=3).fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn polynomial features",
            [("input", Int64TensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnPolynomialFeaturesIntDegree3",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
示例7
def test_model_polynomial_features_float_degree_4(self):
        X = np.array([[1.2, 3.2, 3.1, 1.3], [4.3, 3.2, 0.5, 1.3],
                      [3.2, 4.7, 5.4, 7.1]])
        model = PolynomialFeatures(degree=4).fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn polynomial features",
            [("input", FloatTensorType([None, X.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnPolynomialFeaturesFloatDegree4-Dec4",
            allow_failure="StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
示例8
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler) 
示例9
def polynomial_regression(self, assign=True, degree=2, **kwargs):
        """
        有监督学习回归器,使用:
            make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))

        :param assign: 是否保存实例后的LinearRegression对象,默认True,self.reg = reg
        :param degree: 多项式拟合参数,默认2
        :param kwargs: 由make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))
                       即关键字参数**kwargs全部传递给LinearRegression做为构造参数

        :return: 实例化的回归对象
        """
        reg = make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))
        if assign:
            self.reg = reg
        return reg 
示例10
def sample_1031_3():
    """
    10.3.1_3 猪老三使用回归预测股价:PolynomialFeatures
    :return:
    """
    train_x, train_y_regress, train_y_classification, pig_three_feature, \
    test_x, test_y_regress, test_y_classification, kl_another_word_feature_test = sample_1031_1()

    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.linear_model import LinearRegression

    # pipeline套上 degree=3 + LinearRegression
    estimator = make_pipeline(PolynomialFeatures(degree=3),
                              LinearRegression())
    # 继续使用regress_process,区别是estimator变了
    regress_process(estimator, train_x, train_y_regress, test_x,
                    test_y_regress)
    plt.show() 
示例11
def evaluate_timestamp(self, timestamp):
        """
        Gets datetime object and calculates as a prediction or as an
        interpolation

        - timestamp: datetime object (date_1/date_2 in `calculate`)
        > Returns float of prediction or interpolation
        """

        if (
            datetime.date(1993, 1, 15) > timestamp.date()
            or datetime.date(2019, 2, 7) < timestamp.date()
        ):
            # Perform some data preparation before being
            # able to pass it to the model
            return self.poly_model.predict(
                PolynomialFeatures(degree=3).fit_transform(
                    np.array([timestamp.timestamp()]).reshape(1, -1)
                )
            )[0][0]

        return self.model(timestamp.timestamp()) 
示例12
def poly_inter(self, data):
        # define x values for data points
        X = np.linspace(0, data.shape[0] - 1, data.shape[0])[:, np.newaxis]
        
        # define pipeline and fit model
        model = make_pipeline(PolynomialFeatures(self.degree), Ridge())
        model.fit(X, data)

        if self.plot: plot_poly(X, model.predict(X), data)
        
        # predict next interpolated value
        last = model.predict(np.array([[data.shape[0] - 1]]))
        pred = model.predict(np.array([[data.shape[0]]]))

        # return slope of last point
        return pred[0]/last[0] 
示例13
def feature_transform(X, mode='polynomial', degree=1):

        poly = PolynomialFeatures(degree)
        process_X = poly.fit_transform(X)

        if mode == 'legendre':
            lege = legendre(degree)
            process_X = lege(process_X)

        return process_X 
示例14
def polyfeatures(X):
    poly = PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)
    X_poly = poly.fit_transform(X)
    X = pd.DataFrame(X_poly, columns=poly.get_feature_names())
    return X 
示例15
def learn_on_k_best(archive: utils.Archive[utils.MultiValue], k: int) -> ArrayLike:
    """Approximate optimum learnt from the k best.

    Parameters
    ----------
    archive: utils.Archive[utils.Value]
    """
    items = list(archive.items_as_arrays())
    dimension = len(items[0][0])

    # Select the k best.
    first_k_individuals = [x for x in sorted(items, key=lambda indiv: archive[indiv[0]].get_estimation("pessimistic"))[:k]]
    assert len(first_k_individuals) == k

    # Recenter the best.
    middle = np.array(sum(p[0] for p in first_k_individuals) / k)
    normalization = 1e-15 + np.sqrt(np.sum((first_k_individuals[-1][0] - first_k_individuals[0][0])**2))
    y = [archive[c[0]].get_estimation("pessimistic") for c in first_k_individuals]
    X = np.asarray([(c[0] - middle) / normalization for c in first_k_individuals])

    # We need SKLearn.
    from sklearn.linear_model import LinearRegression
    from sklearn.preprocessing import PolynomialFeatures
    polynomial_features = PolynomialFeatures(degree=2)
    X2 = polynomial_features.fit_transform(X)

    # Fit a linear model.
    model = LinearRegression()
    model.fit(X2, y)

    # Find the minimum of the quadratic model.
    optimizer = OnePlusOne(parametrization=dimension, budget=dimension * dimension + dimension + 500)
    try:
        optimizer.minimize(lambda x: float(model.predict(polynomial_features.fit_transform(np.asarray([x])))))
    except ValueError:
        raise InfiniteMetaModelOptimum("Infinite meta-model optimum in learn_on_k_best.")

    minimum = optimizer.provide_recommendation().value
    if np.sum(minimum**2) > 1.:
        raise InfiniteMetaModelOptimum("huge meta-model optimum in learn_on_k_best.")
    return middle + normalization * minimum 
示例16
def get_features(x,degree):
        print("Extending feature vectors with degree "+str(degree)+" ..")
        featureExtender = PolynomialFeatures(degree, interaction_only=True)   # This function generates low degree monomials. It's a little bit slow though. You may write your own function using recursion.
        tmp=[]
        for current_x in x:
            tmp.append(featureExtender.fit_transform(np.array(current_x).reshape(1, -1))[0].tolist())   # Extend feature vectors with monomials
        return tmp 
示例17
def fit(self, X, y):
        sdim, fdim = X.shape
        for i in range(self.n_estimators):
            ridge = Ridge(alpha=self.alpha, normalize=self.normalize, random_state=self.random_state)
            fidx = self._random_feature_idx(fdim, self.random_state+i*100)
            sidx = self._random_sample_idx(sdim, self.random_state+i*10)
            X_tmp = X[sidx][:,fidx]
            if self.poly:
                X_tmp = PolynomialFeatures(degree=2).fit_transform(X_tmp)[:,1:]
            ridge.fit(X_tmp, y[sidx])
            self.ridge_list[i] = ridge
            self.feature_idx_list[i] = fidx
        return self 
示例18
def predict(self, X):
        y_pred = np.zeros((X.shape[0], self.n_estimators))
        for i in range(self.n_estimators):
            fidx = self.feature_idx_list[i]
            ridge = self.ridge_list[i]
            X_tmp = X[:,fidx]
            if self.poly:
                X_tmp = PolynomialFeatures(degree=2).fit_transform(X_tmp)[:,1:]
            y_pred[:,i] = ridge.predict(X_tmp)
        y_pred = np.mean(y_pred, axis=1)
        return y_pred 
示例19
def poly_fit(x, y, degree, fit="RANSAC"):
  # check if we can use RANSAC
  if fit == "RANSAC":
    try:
      # ignore ImportWarnings in sklearn
      with warnings.catch_warnings():
        warnings.simplefilter("ignore", ImportWarning)
        import sklearn.linear_model as sklin
        import sklearn.preprocessing as skpre
    except ImportError:
      warnings.warn(
        "fitting mode 'RANSAC' requires the package sklearn, using"
        + " 'poly' instead",
        RuntimeWarning)
      fit = "poly"

  if fit == "poly":
    return np.polyfit(x, y, degree)
  elif fit == "RANSAC":
    model = sklin.RANSACRegressor(sklin.LinearRegression(fit_intercept=False))
    xdat = np.asarray(x)
    if len(xdat.shape) == 1:
      # interpret 1d-array as list of len(x) samples instead of
      # one sample of length len(x)
      xdat = xdat.reshape(-1, 1)
    polydat = skpre.PolynomialFeatures(degree).fit_transform(xdat)
    try:
      model.fit(polydat, y)
      coef = model.estimator_.coef_[::-1]
    except ValueError:
      warnings.warn(
        "RANSAC did not reach consensus, "
        + "using numpy's polyfit",
        RuntimeWarning)
      coef = np.polyfit(x, y, degree)
    return coef
  else:
    raise ValueError("invalid fitting mode ({})".format(fit)) 
示例20
def test_partial_dependence_easy_target(est, power):
    # If the target y only depends on one feature in an obvious way (linear or
    # quadratic) then the partial dependence for that feature should reflect
    # it.
    # We here fit a linear regression_data model (with polynomial features if
    # needed) and compute r_squared to check that the partial dependence
    # correctly reflects the target.

    rng = np.random.RandomState(0)
    n_samples = 100
    target_variable = 2
    X = rng.normal(size=(n_samples, 5))
    y = X[:, target_variable]**power

    est.fit(X, y)

    averaged_predictions, values = partial_dependence(
        est, features=[target_variable], X=X, grid_resolution=1000)

    new_X = values[0].reshape(-1, 1)
    new_y = averaged_predictions[0]
    # add polynomial features if needed
    new_X = PolynomialFeatures(degree=power).fit_transform(new_X)

    lr = LinearRegression().fit(new_X, new_y)
    r2 = r2_score(new_y, lr.predict(new_X))

    assert r2 > .99 
示例21
def train_polynomial_regression_model(self, degree):
        """训练多项式回归模型"""
        poly_feat = PolynomialFeatures(degree=degree)
        x_tranformed = poly_feat.fit_transform(self.dataset_X)
        linear = LinearRegression()
        linear.fit(x_tranformed, self.dataset_Y)
        self.predict = lambda x: linear.predict(poly_feat.transform(x))[0] 
示例22
def svc_example(n_samples = 10000, n_features = 4):
	from sklearn.svm import LinearSVC
	from sklearn.preprocessing import PolynomialFeatures
	from sklearn.datasets import make_classification
	
	X,Y = make_classification(n_samples, n_features)
	#pp = PolynomialFeatures(degree=3)
	
	#X = pp.fit_transform(X)
	m = LinearSVC()
	m.fit(X,Y) 
示例23
def get_polynomials(features, poly_degree):
    r"""Generate interactions that are products of distinct features.

    Parameters
    ----------
    features : pandas.DataFrame
        Dataframe containing the features for generating interactions.
    poly_degree : int
        The degree of the polynomial features.

    Returns
    -------
    poly_features : numpy array
        The interaction features only.
    poly_fnames : list
        List of polynomial feature names.

    References
    ----------
    You can find more information on polynomial interactions here [POLY]_.

    .. [POLY] http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html

    """
    polyf = PolynomialFeatures(interaction_only=True,
                               degree=poly_degree,
                               include_bias=False)
    poly_features = polyf.fit_transform(features)
    poly_fnames = polyf.get_feature_names()
    return poly_features, poly_fnames


#
# Function get_text_features
# 
示例24
def multipipeline():
    multipipeline = MultiPipeline(
        {
            'scaler': StandardScaler(),
            'poly': PolynomialFeatures(degree=2)
        }
    )
    return multipipeline 
示例25
def fit(self, y_train, fh=None, X_train=None):
        """Fit to training data.

        Parameters
        ----------
        y_train : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X_train : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        if X_train is not None:
            raise NotImplementedError()
        self._set_oh(y_train)
        self._set_fh(fh)

        # for default regressor, set fit_intercept=False as we generate a
        # dummy variable in polynomial features
        r = self.regressor if self.regressor is not None else LinearRegression(
            fit_intercept=False)  #
        self.regressor_ = make_pipeline(PolynomialFeatures(
            degree=self.degree,
            include_bias=self.with_intercept),
            r)
        x = y_train.index.values.reshape(-1, 1)
        self.regressor_.fit(x, y_train.values)
        self._is_fitted = True
        return self 
示例26
def b_fit_score(self, x, y):
        """ Compute the RECI fit score

        Args:
            x (numpy.ndarray): Variable 1
            y (numpy.ndarray): Variable 2

        Returns:
            float: RECI fit score

        """
        x = np.reshape(minmax_scale(x), (-1, 1))
        y = np.reshape(minmax_scale(y), (-1, 1))
        poly = PolynomialFeatures(degree=self.degree)
        poly_x = poly.fit_transform(x)

        poly_x[:,1] = 0
        poly_x[:,2] = 0

        regressor = LinearRegression()
        regressor.fit(poly_x, y)

        y_predict = regressor.predict(poly_x)
        error = mean_squared_error(y_predict, y)

        return error 
示例27
def add_curve(f, x, y, source, group_size=30, color='black', alpha=1, smooth=1, y_range_name='default'):
    if f:
        # https://scikit-learn.org/stable/auto_examples/linear_model/plot_polynomial_interpolation.html
        source = source[[x, y]].dropna()
        degree = min(10, max(1, len(source) // group_size))
        log.debug(f'Fitting polynomial of degree {degree} (ridge alpha {smooth})')
        xf = source[x].values.astype('float64')
        nxf = (xf - xf[0]) / (xf[1] - xf[0])
        nxf2 = nxf[:, np.newaxis]
        y2 = source[y].values[:, np.newaxis]
        model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=smooth))
        model.fit(nxf2, y2)
        f.line(source[x], model.predict(nxf2)[:, 0], color=color, alpha=alpha, y_range_name=y_range_name) 
示例28
def test_basic(self):
        a = dpp.PolynomialFeatures()
        b = spp.PolynomialFeatures()

        a.fit(X)
        b.fit(X.compute())
        assert_estimator_equal(a._transformer, b) 
示例29
def test_input_types(self):
        a = dpp.PolynomialFeatures()
        b = spp.PolynomialFeatures()

        assert_estimator_equal(a.fit(df), a.fit(df.compute()))
        assert_estimator_equal(a.fit(df), a.fit(df.compute().values))
        assert_estimator_equal(a.fit(df.values), a.fit(df.compute().values))
        assert_estimator_equal(a.fit(df), b.fit(df.compute()))
        assert_estimator_equal(a.fit(df), b.fit(df.compute().values)) 
示例30
def test_array_transform(self):
        a = dpp.PolynomialFeatures()
        b = spp.PolynomialFeatures()

        res_a = a.fit_transform(X)
        res_b = b.fit_transform(X.compute())
        assert_estimator_equal(a, b)
        assert dask.is_dask_collection(res_a)
        assert_eq_ar(res_a, res_b)