Python源码示例:sklearn.metrics.v_measure_score()

示例1
def test_birch_predict():
    # Test the predict method predicts the nearest centroid.
    rng = np.random.RandomState(0)
    X = generate_clustered_data(n_clusters=3, n_features=3,
                                n_samples_per_cluster=10)

    # n_samples * n_samples_per_cluster
    shuffle_indices = np.arange(30)
    rng.shuffle(shuffle_indices)
    X_shuffle = X[shuffle_indices, :]
    brc = Birch(n_clusters=4, threshold=1.)
    brc.fit(X_shuffle)
    centroids = brc.subcluster_centers_
    assert_array_equal(brc.labels_, brc.predict(X_shuffle))
    nearest_centroid = pairwise_distances_argmin(X_shuffle, centroids)
    assert_almost_equal(v_measure_score(nearest_centroid, brc.labels_), 1.0) 
示例2
def test_birch_predict():
    # Test the predict method predicts the nearest centroid.
    rng = np.random.RandomState(0)
    X = generate_clustered_data(n_clusters=3, n_features=3,
                                n_samples_per_cluster=10)

    # n_samples * n_samples_per_cluster
    shuffle_indices = np.arange(30)
    rng.shuffle(shuffle_indices)
    X_shuffle = X[shuffle_indices, :]
    brc = Birch(n_clusters=4, threshold=1.)
    brc.fit(X_shuffle)
    centroids = brc.subcluster_centers_
    assert_array_equal(brc.labels_, brc.predict(X_shuffle))
    nearest_centroid = pairwise_distances_argmin(X_shuffle, centroids)
    assert_almost_equal(v_measure_score(nearest_centroid, brc.labels_), 1.0) 
示例3
def bench_k_means(estimator, name, data):
    estimator.fit(data)
    # A short explanation for every score:
    # homogeneity:          each cluster contains only members of a single class (range 0 - 1)
    # completeness:         all members of a given class are assigned to the same cluster (range 0 - 1)
    # v_measure:            harmonic mean of homogeneity and completeness
    # adjusted_rand:        similarity of the actual values and their predictions,
    #                       ignoring permutations and with chance normalization
    #                       (range -1 to 1, -1 being bad, 1 being perfect and 0 being random)
    # adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations
    #                       (range 0 - 1, with 0 being random agreement and 1 being perfect agreement)
    # silhouette:           uses the mean distance between a sample and all other points in the same class,
    #                       as well as the mean distance between a sample and all other points in the nearest cluster
    #                       to calculate a score (range: -1 to 1, with the former being incorrect,
    #                       and the latter standing for highly dense clustering.
    #                       0 indicates overlapping clusters.
    print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t'
          'adjusted-mutual-info: %.3f \tsilhouette: %.3f'
          % (name, estimator.inertia_,
             metrics.homogeneity_score(y, estimator.labels_),
             metrics.completeness_score(y, estimator.labels_),
             metrics.v_measure_score(y, estimator.labels_),
             metrics.adjusted_rand_score(y, estimator.labels_),
             metrics.adjusted_mutual_info_score(y,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean'))) 
示例4
def v_measure_kmeans_scorer(self, min_similarity):
        return self.kmeans_scorer(
            metrics.v_measure_score,
            min_similarity
        ) 
示例5
def v_measure_dbscan_scorer(self, min_similarity):
        return self.dbscan_scorer(
            metrics.v_measure_score,
            min_similarity
        ) 
示例6
def _compute_vmeasure_score(labels, predictions):
  vmeasure_score = math_ops.to_float(
      script_ops.py_func(
          metrics.v_measure_score, [labels, predictions], [dtypes.float64],
          name='vmeasure'))
  return math_ops.maximum(0.0, vmeasure_score) 
示例7
def nmi(labels):
    """
    Calculates normalized mutual information for all combinations of `labels`

    Uses :py:func:`sklearn.metrics.v_measure_score` for calculation; refer to
    that codebase for information on algorithm.

    Parameters
    ----------
    labels : m-length list of (N,) array_like
        List of label arrays

    Returns
    -------
    nmi : (m x m) np.ndarray
        NMI score for all combinations of `labels`

    Examples
    --------
    >>> import numpy as np
    >>> label1 = np.array([1, 1, 1, 2, 2, 2])
    >>> label2 = np.array([1, 1, 2, 2, 2, 2])

    >>> from snf import metrics
    >>> metrics.nmi([label1, label2])
    array([[1.        , 0.47870397],
           [0.47870397, 1.        ]])
    """

    # create empty array for output
    nmi = np.empty(shape=(len(labels), len(labels)))
    # get indices for all combinations of labels and calculate NMI
    for x, y in np.column_stack(np.triu_indices_from(nmi)):
        nmi[x, y] = v_measure_score(labels[x], labels[y])
    # make output symmetric
    nmi = np.triu(nmi) + np.triu(nmi, k=1).T

    return nmi 
示例8
def rank_feature_by_nmi(inputs, W, *, K=20, mu=0.5, n_clusters=None):
    """
    Calculates NMI of each feature in `inputs` with `W`

    Parameters
    ----------
    inputs : list-of-tuple
        Each tuple should contain (1) an (N, M) data array, where N is samples
        M is features, and (2) a string indicating the metric to use to compute
        a distance matrix for the given data. This MUST be one of the options
        available in :py:func:`scipy.spatial.distance.cdist`
    W : (N, N) array_like
        Similarity array generated by :py:func:`snf.compute.snf`
    K : (0, N) int, optional
        Hyperparameter normalization factor for scaling. Default: 20
    mu : (0, 1) float, optional
        Hyperparameter normalization factor for scaling. Default: 0.5
    n_clusters : int, optional
        Number of desired clusters. Default: determined by eigengap (see
        `snf.get_n_clusters()`)

    Returns
    -------
    nmi : list of (M,) np.ndarray
        Normalized mutual information scores for each feature of input arrays
    """

    if n_clusters is None:
        n_clusters = compute.get_n_clusters(W)[0]
    snf_labels = spectral_clustering(W, n_clusters)
    nmi = [np.empty(shape=(d.shape[-1])) for d, m in inputs]
    for ndtype, (dtype, metric) in enumerate(inputs):
        for nfeature, feature in enumerate(np.asarray(dtype).T):
            aff = compute.make_affinity(np.vstack(feature), K=K, mu=mu,
                                        metric=metric)
            aff_labels = spectral_clustering(aff, n_clusters)
            nmi[ndtype][nfeature] = v_measure_score(snf_labels, aff_labels)

    return nmi 
示例9
def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size))) 
示例10
def test_v_measure_score(self):
        result = self.df.metrics.v_measure_score()
        expected = metrics.v_measure_score(self.target, self.pred)
        self.assertEqual(result, expected) 
示例11
def test_KMeans_scores(self):
        digits = datasets.load_digits()
        df = pdml.ModelFrame(digits)

        scaled = pp.scale(digits.data)
        df.data = df.data.pp.scale()
        self.assert_numpy_array_almost_equal(df.data.values, scaled)

        clf1 = cluster.KMeans(init='k-means++', n_clusters=10,
                              n_init=10, random_state=self.random_state)
        clf2 = df.cluster.KMeans(init='k-means++', n_clusters=10,
                                 n_init=10, random_state=self.random_state)
        clf1.fit(scaled)
        df.fit_predict(clf2)

        expected = m.homogeneity_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.homogeneity_score(), expected)

        expected = m.completeness_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.completeness_score(), expected)

        expected = m.v_measure_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.v_measure_score(), expected)

        expected = m.adjusted_rand_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.adjusted_rand_score(), expected)

        expected = m.homogeneity_score(digits.target, clf1.labels_)
        self.assertEqual(df.metrics.homogeneity_score(), expected)

        expected = m.silhouette_score(scaled, clf1.labels_, metric='euclidean',
                                      sample_size=300, random_state=self.random_state)
        result = df.metrics.silhouette_score(metric='euclidean', sample_size=300,
                                             random_state=self.random_state)
        self.assertAlmostEqual(result, expected) 
示例12
def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)))