Python源码示例:sklearn.metrics.homogeneity_score()
示例1
def bench_k_means(estimator, name, data):
estimator.fit(data)
# A short explanation for every score:
# homogeneity: each cluster contains only members of a single class (range 0 - 1)
# completeness: all members of a given class are assigned to the same cluster (range 0 - 1)
# v_measure: harmonic mean of homogeneity and completeness
# adjusted_rand: similarity of the actual values and their predictions,
# ignoring permutations and with chance normalization
# (range -1 to 1, -1 being bad, 1 being perfect and 0 being random)
# adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations
# (range 0 - 1, with 0 being random agreement and 1 being perfect agreement)
# silhouette: uses the mean distance between a sample and all other points in the same class,
# as well as the mean distance between a sample and all other points in the nearest cluster
# to calculate a score (range: -1 to 1, with the former being incorrect,
# and the latter standing for highly dense clustering.
# 0 indicates overlapping clusters.
print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t'
'adjusted-mutual-info: %.3f \tsilhouette: %.3f'
% (name, estimator.inertia_,
metrics.homogeneity_score(y, estimator.labels_),
metrics.completeness_score(y, estimator.labels_),
metrics.v_measure_score(y, estimator.labels_),
metrics.adjusted_rand_score(y, estimator.labels_),
metrics.adjusted_mutual_info_score(y, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean')))
示例2
def homogeneity_kmeans_scorer(self, min_similarity):
return self.kmeans_scorer(
metrics.homogeneity_score,
min_similarity
)
示例3
def homogeneity_dbscan_scorer(self, min_similarity):
return self.dbscan_scorer(
metrics.homogeneity_score,
min_similarity
)
示例4
def evaluate_on_digits():
digits = datasets.load_digits()
data = digits.data
target = digits.target
gng = GrowingNeuralGas(data)
gng.fit_network(e_b=0.05, e_n=0.006, a_max=8, l=100, a=0.5, d=0.995, passes=5, plot_evolution=False)
clustered_data = gng.cluster_data()
print('Found %d clusters.' % nx.number_connected_components(gng.network))
target_infered = []
for observation, cluster in clustered_data:
target_infered.append(cluster)
homogeneity = metrics.homogeneity_score(target, target_infered)
print(homogeneity)
gng.plot_clusters(gng.reduce_dimension(gng.cluster_data()))
示例5
def bench_k_means(estimator, name, data):
t0 = time()
estimator.fit(data)
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean',
sample_size=sample_size)))
示例6
def test_homogeneity_score(self):
result = self.df.metrics.homogeneity_score()
expected = metrics.homogeneity_score(self.target, self.pred)
self.assertEqual(result, expected)
示例7
def test_KMeans_scores(self):
digits = datasets.load_digits()
df = pdml.ModelFrame(digits)
scaled = pp.scale(digits.data)
df.data = df.data.pp.scale()
self.assert_numpy_array_almost_equal(df.data.values, scaled)
clf1 = cluster.KMeans(init='k-means++', n_clusters=10,
n_init=10, random_state=self.random_state)
clf2 = df.cluster.KMeans(init='k-means++', n_clusters=10,
n_init=10, random_state=self.random_state)
clf1.fit(scaled)
df.fit_predict(clf2)
expected = m.homogeneity_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.homogeneity_score(), expected)
expected = m.completeness_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.completeness_score(), expected)
expected = m.v_measure_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.v_measure_score(), expected)
expected = m.adjusted_rand_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.adjusted_rand_score(), expected)
expected = m.homogeneity_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.homogeneity_score(), expected)
expected = m.silhouette_score(scaled, clf1.labels_, metric='euclidean',
sample_size=300, random_state=self.random_state)
result = df.metrics.silhouette_score(metric='euclidean', sample_size=300,
random_state=self.random_state)
self.assertAlmostEqual(result, expected)
示例8
def bench_k_means(estimator, name, data):
t0 = time()
estimator.fit(data)
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean',
sample_size=sample_size)))