Python源码示例:sklearn.metrics.v_measure_score()
示例1
def test_birch_predict():
# Test the predict method predicts the nearest centroid.
rng = np.random.RandomState(0)
X = generate_clustered_data(n_clusters=3, n_features=3,
n_samples_per_cluster=10)
# n_samples * n_samples_per_cluster
shuffle_indices = np.arange(30)
rng.shuffle(shuffle_indices)
X_shuffle = X[shuffle_indices, :]
brc = Birch(n_clusters=4, threshold=1.)
brc.fit(X_shuffle)
centroids = brc.subcluster_centers_
assert_array_equal(brc.labels_, brc.predict(X_shuffle))
nearest_centroid = pairwise_distances_argmin(X_shuffle, centroids)
assert_almost_equal(v_measure_score(nearest_centroid, brc.labels_), 1.0)
示例2
def test_birch_predict():
# Test the predict method predicts the nearest centroid.
rng = np.random.RandomState(0)
X = generate_clustered_data(n_clusters=3, n_features=3,
n_samples_per_cluster=10)
# n_samples * n_samples_per_cluster
shuffle_indices = np.arange(30)
rng.shuffle(shuffle_indices)
X_shuffle = X[shuffle_indices, :]
brc = Birch(n_clusters=4, threshold=1.)
brc.fit(X_shuffle)
centroids = brc.subcluster_centers_
assert_array_equal(brc.labels_, brc.predict(X_shuffle))
nearest_centroid = pairwise_distances_argmin(X_shuffle, centroids)
assert_almost_equal(v_measure_score(nearest_centroid, brc.labels_), 1.0)
示例3
def bench_k_means(estimator, name, data):
estimator.fit(data)
# A short explanation for every score:
# homogeneity: each cluster contains only members of a single class (range 0 - 1)
# completeness: all members of a given class are assigned to the same cluster (range 0 - 1)
# v_measure: harmonic mean of homogeneity and completeness
# adjusted_rand: similarity of the actual values and their predictions,
# ignoring permutations and with chance normalization
# (range -1 to 1, -1 being bad, 1 being perfect and 0 being random)
# adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations
# (range 0 - 1, with 0 being random agreement and 1 being perfect agreement)
# silhouette: uses the mean distance between a sample and all other points in the same class,
# as well as the mean distance between a sample and all other points in the nearest cluster
# to calculate a score (range: -1 to 1, with the former being incorrect,
# and the latter standing for highly dense clustering.
# 0 indicates overlapping clusters.
print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t'
'adjusted-mutual-info: %.3f \tsilhouette: %.3f'
% (name, estimator.inertia_,
metrics.homogeneity_score(y, estimator.labels_),
metrics.completeness_score(y, estimator.labels_),
metrics.v_measure_score(y, estimator.labels_),
metrics.adjusted_rand_score(y, estimator.labels_),
metrics.adjusted_mutual_info_score(y, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean')))
示例4
def v_measure_kmeans_scorer(self, min_similarity):
return self.kmeans_scorer(
metrics.v_measure_score,
min_similarity
)
示例5
def v_measure_dbscan_scorer(self, min_similarity):
return self.dbscan_scorer(
metrics.v_measure_score,
min_similarity
)
示例6
def _compute_vmeasure_score(labels, predictions):
vmeasure_score = math_ops.to_float(
script_ops.py_func(
metrics.v_measure_score, [labels, predictions], [dtypes.float64],
name='vmeasure'))
return math_ops.maximum(0.0, vmeasure_score)
示例7
def nmi(labels):
"""
Calculates normalized mutual information for all combinations of `labels`
Uses :py:func:`sklearn.metrics.v_measure_score` for calculation; refer to
that codebase for information on algorithm.
Parameters
----------
labels : m-length list of (N,) array_like
List of label arrays
Returns
-------
nmi : (m x m) np.ndarray
NMI score for all combinations of `labels`
Examples
--------
>>> import numpy as np
>>> label1 = np.array([1, 1, 1, 2, 2, 2])
>>> label2 = np.array([1, 1, 2, 2, 2, 2])
>>> from snf import metrics
>>> metrics.nmi([label1, label2])
array([[1. , 0.47870397],
[0.47870397, 1. ]])
"""
# create empty array for output
nmi = np.empty(shape=(len(labels), len(labels)))
# get indices for all combinations of labels and calculate NMI
for x, y in np.column_stack(np.triu_indices_from(nmi)):
nmi[x, y] = v_measure_score(labels[x], labels[y])
# make output symmetric
nmi = np.triu(nmi) + np.triu(nmi, k=1).T
return nmi
示例8
def rank_feature_by_nmi(inputs, W, *, K=20, mu=0.5, n_clusters=None):
"""
Calculates NMI of each feature in `inputs` with `W`
Parameters
----------
inputs : list-of-tuple
Each tuple should contain (1) an (N, M) data array, where N is samples
M is features, and (2) a string indicating the metric to use to compute
a distance matrix for the given data. This MUST be one of the options
available in :py:func:`scipy.spatial.distance.cdist`
W : (N, N) array_like
Similarity array generated by :py:func:`snf.compute.snf`
K : (0, N) int, optional
Hyperparameter normalization factor for scaling. Default: 20
mu : (0, 1) float, optional
Hyperparameter normalization factor for scaling. Default: 0.5
n_clusters : int, optional
Number of desired clusters. Default: determined by eigengap (see
`snf.get_n_clusters()`)
Returns
-------
nmi : list of (M,) np.ndarray
Normalized mutual information scores for each feature of input arrays
"""
if n_clusters is None:
n_clusters = compute.get_n_clusters(W)[0]
snf_labels = spectral_clustering(W, n_clusters)
nmi = [np.empty(shape=(d.shape[-1])) for d, m in inputs]
for ndtype, (dtype, metric) in enumerate(inputs):
for nfeature, feature in enumerate(np.asarray(dtype).T):
aff = compute.make_affinity(np.vstack(feature), K=K, mu=mu,
metric=metric)
aff_labels = spectral_clustering(aff, n_clusters)
nmi[ndtype][nfeature] = v_measure_score(snf_labels, aff_labels)
return nmi
示例9
def bench_k_means(estimator, name, data):
t0 = time()
estimator.fit(data)
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean',
sample_size=sample_size)))
示例10
def test_v_measure_score(self):
result = self.df.metrics.v_measure_score()
expected = metrics.v_measure_score(self.target, self.pred)
self.assertEqual(result, expected)
示例11
def test_KMeans_scores(self):
digits = datasets.load_digits()
df = pdml.ModelFrame(digits)
scaled = pp.scale(digits.data)
df.data = df.data.pp.scale()
self.assert_numpy_array_almost_equal(df.data.values, scaled)
clf1 = cluster.KMeans(init='k-means++', n_clusters=10,
n_init=10, random_state=self.random_state)
clf2 = df.cluster.KMeans(init='k-means++', n_clusters=10,
n_init=10, random_state=self.random_state)
clf1.fit(scaled)
df.fit_predict(clf2)
expected = m.homogeneity_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.homogeneity_score(), expected)
expected = m.completeness_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.completeness_score(), expected)
expected = m.v_measure_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.v_measure_score(), expected)
expected = m.adjusted_rand_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.adjusted_rand_score(), expected)
expected = m.homogeneity_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.homogeneity_score(), expected)
expected = m.silhouette_score(scaled, clf1.labels_, metric='euclidean',
sample_size=300, random_state=self.random_state)
result = df.metrics.silhouette_score(metric='euclidean', sample_size=300,
random_state=self.random_state)
self.assertAlmostEqual(result, expected)
示例12
def bench_k_means(estimator, name, data):
t0 = time()
estimator.fit(data)
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean',
sample_size=sample_size)))