# -*- coding: utf-8 -*- from concurrent.futures import ThreadPoolExecutor import time import matplotlib.pyplot as plt import pandas as pd import numpy as np datatmsp = pd.read_csv('california_housing_train.csv') def process_features(data): data=data[ ["latitude", "longitude", "housing_median_age", "total_rooms", "total_bedrooms", "population", "households", "median_income"]] processing =data.copy() processing["rooms_per_person"]=processing["total_rooms"]/processing["population"] return processing def process_targets(data): processing=data.copy() processing=processing[["median_house_value"]] processing["median_house_value"]=processing["median_house_value"]/1000.0 return processing trainning_examples=process_features(datatmsp).head(12000) print trainning_examples.describe() trainning_targets=process_targets(datatmsp).head(12000) print trainning_targets.describe() valid_examples=process_features(datatmsp).tail(5000) print valid_examples.describe() valid_targets=process_targets(datatmsp).tail(5000) print valid_targets.describe() plt.figure(figsize=(13,8)) ax = plt.subplot(1, 2, 1) ax.set_title("Validation Data") ax.set_autoscaley_on(False) ax.set_ylim([32, 43]) ax.set_autoscalex_on(False) ax.set_xlim([-126, -112]) plt.scatter(valid_examples["longitude"], valid_examples["latitude"], cmap="coolwarm", c=valid_targets["median_house_value"] / valid_targets["median_house_value"].max()) ax = plt.subplot(1,2,2) ax.set_title("Training Data") ax.set_autoscaley_on(False) ax.set_ylim([32, 43]) ax.set_autoscalex_on(False) ax.set_xlim([-126, -112]) plt.scatter(trainning_examples["longitude"], trainning_examples["latitude"], cmap="coolwarm", c=trainning_targets["median_house_value"] / trainning_targets["median_house_value"].max()) plt.show()
谷歌机器学习笔记之plt作图
最新推荐文章于 2022-06-25 15:30:28 发布