Python源码示例:sklearn.metrics.mutual_info_score()
示例1
def ami(x, y=None, n_bins=10):
"""Calculate the average mutual information between $x(t)$ and $y(t)$.
Parameters
----------
x : array-like
y : array-like, optional
$x(t)$ and $y(t)$.
If only `x` is passed, it must have two columns;
the first column defines $x(t)$ and the second $y(t)$.
n_bins : int
The number of bins to use when computing the joint histogram.
Returns
-------
scalar
Average mutual information between $x(t)$ and $y(t)$, in nats (natural log equivalent of bits).
See Also
--------
lagged_ami
References
----------
Arbanel, H. D. (1996). *Analysis of Observed Chaotic Data* (p. 28). New York: Springer.
"""
x, y = _vector_pair(x, y)
if x.shape[0] != y.shape[0]:
raise ValueError('timeseries must have the same length')
return metrics.mutual_info_score(None, None, contingency=np.histogram2d(x, y, bins=n_bins)[0])
示例2
def mutual_information(x, y, bins=8):
"""Mutual information score with set number of bins
Helper function for `sklearn.metrics.mutual_info_score` that builds a
contingency table over a set number of bins.
Credit: `Warran Weckesser <https://stackoverflow.com/a/20505476/3996580>`_.
Parameters
----------
x : array-like, shape=[n_samples]
Input data (feature 1)
y : array-like, shape=[n_samples]
Input data (feature 2)
bins : int or array-like, (default: 8)
Passed to np.histogram2d to calculate a contingency table.
Returns
-------
mi : float
Mutual information between x and y.
Examples
--------
>>> import scprep
>>> data = scprep.io.load_csv("my_data.csv")
>>> mi = scprep.stats.mutual_information(data['GENE1'], data['GENE2'])
"""
x, y = _vector_coerce_two_dense(x, y)
c_xy = np.histogram2d(x, y, bins)[0]
mi = metrics.mutual_info_score(None, None, contingency=c_xy)
return mi
示例3
def calc_MI(x, y, bins):
c_xy = np.histogram2d(x, y, bins)[0]
mi = mutual_info_score(None, None, contingency=c_xy)
return mi
示例4
def test_mutual_info_score(self):
result = self.df.metrics.mutual_info_score()
expected = metrics.mutual_info_score(self.target, self.pred)
self.assertEqual(result, expected)
示例5
def permutation_test_ct2(data, num_samples=10000):
"""
Monte-Carlo permutation test for a 2-way contingency table
Parameters
----------
data :
the contingency table
num_samples :
the number of random permutations to perform
Returns
-------
pval :
the p-value
References
----------
https://en.wikipedia.org/wiki/Resampling_(statistics)
"""
if isinstance(data, pd.DataFrame):
data = np.array(data)
dim = data.shape
data_x = []
data_y = []
for x in range(0, dim[0]):
for y in range(0, dim[1]):
data_x += [x]*data[x, y]
data_y += [y]*data[x, y]
stat_0 = metrics.mutual_info_score(data_x, data_y)
k = 0
for _ in range(num_samples):
np.random.shuffle(data_x)
mi = metrics.mutual_info_score(data_x, data_y)
k += stat_0 < mi
pval = (1.0*k) / num_samples
return max(pval, 1.0/num_samples)
示例6
def accuracy(self, sources: Sources) -> Accuracy:
if not self._filepath.is_file():
raise ModelNotTrained("Train model before assessing for accuracy.")
xdata = []
ydata = []
target = []
estimator_type = self.clf._estimator_type
if estimator_type == "clusterer":
target = (
[]
if self.parent.config.tcluster is None
else [self.parent.config.tcluster.name]
)
async for record in sources.with_features(self.features):
feature_data = record.features(self.features)
xdata.append(list(feature_data.values()))
ydata.append(list(record.features(target).values()))
xdata = self.np.array(xdata)
self.logger.debug("Number of input records: {}".format(len(xdata)))
if target:
ydata = self.np.array(ydata).flatten()
if hasattr(self.clf, "predict"):
# xdata can be training data or unseen data
# inductive clusterer with ground truth
y_pred = self.clf.predict(xdata)
self.confidence = mutual_info_score(ydata, y_pred)
else:
# requires xdata = training data
# transductive clusterer with ground truth
self.logger.critical(
"Accuracy found transductive clusterer, ensure data being passed is training data"
)
self.confidence = mutual_info_score(ydata, self.clf.labels_)
else:
if hasattr(self.clf, "predict"):
# xdata can be training data or unseen data
# inductive clusterer without ground truth
y_pred = self.clf.predict(xdata)
self.confidence = silhouette_score(xdata, y_pred)
else:
# requires xdata = training data
# transductive clusterer without ground truth
self.logger.critical(
"Accuracy found transductive clusterer, ensure data being passed is training data"
)
self.confidence = silhouette_score(xdata, self.clf.labels_)
self.logger.debug("Model Accuracy: {}".format(self.confidence))
return self.confidence
示例7
def one_way_mi(df, feature_list, group_column, y_var, bins):
"""
Calculates one-way mutual information group variable and a
target variable (y) given a feature list regarding.
Parameters
----------
df : pandas DataFrame
df with features used to train model, plus a target variable
and a group column.
feature_list : list DataFrame
List of strings, feature names.
group_column : string
name of column for testing bias, should contain numeric categories
y_var : string
name of target variable column
bins : tuple
number of bins for each dimension
Returns
-------
mi_table : pandas DataFrame
data frame with mutual information values, with one row per feature
in the feature_list, columns for group and y.
"""
group_cats = df[group_column].values
y_cats = df[y_var].values
c_g = [
np.histogramdd([np.array(df[feature]), group_cats], bins=bins)[0]
for feature in feature_list
]
c_y = [
np.histogramdd([np.array(df[feature]), y_cats], bins=bins)[0]
for feature in feature_list
]
# compute mutual information (MI) between trait and gender/eth/y
mi_g = [mutual_info_score(None, None, contingency=i) for i in c_g]
mi_y = [mutual_info_score(None, None, contingency=i) for i in c_y]
mi_table = pd.DataFrame({'feature': feature_list,
group_column: mi_g,
y_var: mi_y})
# NOTE: Scale group and y where the highest MI is scaled to 1 to
# facilitate interpreting relative importance to bias and performance
mi_table["{}_scaled".format(group_column)] = (
mi_table[group_column] / mi_table[group_column].max()
)
mi_table["{}_scaled".format(y_var)] = (
mi_table[y_var] / mi_table[y_var].max()
)
return mi_table