Python源码示例:sklearn.datasets.fetch_california_housing()
示例1
def fetch(*args, **kwargs):
return fetch_california_housing(*args, download_if_missing=False, **kwargs)
示例2
def load_data_target(name):
"""
Loads data and target given the name of the dataset.
"""
if name == "Boston":
data = load_boston()
elif name == "Housing":
data = fetch_california_housing()
dataset_size = 1000 # this is necessary so that SVR does not slow down too much
data["data"] = data["data"][:dataset_size]
data["target"] =data["target"][:dataset_size]
elif name == "digits":
data = load_digits()
elif name == "Climate Model Crashes":
try:
data = fetch_mldata("climate-model-simulation-crashes")
except HTTPError as e:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00252/pop_failures.dat"
data = urlopen(url).read().split('\n')[1:]
data = [[float(v) for v in d.split()] for d in data]
samples = np.array(data)
data = dict()
data["data"] = samples[:, :-1]
data["target"] = np.array(samples[:, -1], dtype=np.int)
else:
raise ValueError("dataset not supported.")
return data["data"], data["target"]
示例3
def load_housing():
from sklearn.datasets import fetch_california_housing
d=fetch_california_housing()
d['data'] -= d['data'].mean(axis=0)
d['data'] /= d['data'].std(axis=0)
# Housing prices above 5 are all collapsed to 5, which makes the Y distribution very strange. Drop these
d['data'] = d['data'][d['target'] < 5]
d['target'] = d['target'][d['target'] < 5]
d['target'] = np.log(d['target'])
np.random.seed(12345)
permutation = np.random.permutation(len(d['data']))
d['data'] = d['data'][permutation]
d['target'] = d['target'][permutation]
l = int(len(d['data'])*0.8)
data = {'err':'mse',
'trn_X': d['data'][:l],
'trn_Y': np.atleast_2d(d['target'][:l]).T,
'tst_X': d['data'][l:],
'tst_Y': np.atleast_2d(d['target'][l:]).T,
}
return data