第15章 生成数据
mpl_squares.py
import matplotlib.pyplot as plt
input_value = [1, 2, 3, 4, 5]
squares = [1, 4, 9, 16, 25]
# 尝试根据这些数字绘制出有意义的图形
# lineheight决定了plot()绘制的线条粗细
plt.plot(input_value,squares, linewidth=5)
# 打开matplotlib查看器 并显示绘制的图形
# 设置图标标题,并给坐标轴加上标签
plt.title("Square Numbers", fontsize=24)
plt.xlabel("Value", fontsize=14)
plt.ylabel("Square of Value", fontsize=14)
# 设置刻度标记的大小
plt.tick_params(axis="both", labelsize=14)
plt.show()
scatter_squares.py
import matplotlib.pyplot as plt
# plt.scatter(2,4,s=200)
# x_values = [1,2,3,4,5]
# y_values = [1,4,9,16,25]
# plt.scatter(x_values,y_values,s=100)
x_values = list(range(1, 1001))
# 生成y值的列表解析
y_values = [x ** 2 for x in x_values]
# 删除数据点的黑色轮廓 edgecolors
# 自定义颜色 c
# plt.scatter(x_values, y_values, c='black', edgecolors='None', s=20)
# 颜色映射
plt.scatter(x_values, y_values, c=y_values, cmap=plt.cm.Blues, edgecolors='none', s=40)
# 设置图标标题,并给坐标轴加上标签
plt.title("Square Numbers", fontsize=24)
plt.xlabel("Value", fontsize=14)
plt.ylabel("Square of Value", fontsize=14)
# 设置刻度标记的大小
plt.tick_params(axis="both", which='major', labelsize=14)
# 设置每个坐标轴的取值范围
plt.axis([0, 1100, 0, 1100000])
plt.show()
# # 自动保存图表
# plt.savefig('suqare_plot.png',bbox_inches='tight')
random_walk.py
from random import choice
class RandomWalk():
# 一个生成随机漫步数据的类
def __init__(self, num_points=5000):
# 初始化随机漫步的属性
self.num_points = num_points
# 所有随机漫步都始于(0,0)坐标
self.x_values = [0]
self.y_values = [0]
def fill_walk(self):
# 计算随机 漫步包含的所有点
# 不断漫步,直到列表达到指定的长度
while len(self.x_values) < self.num_points:
# 决定前进方向以及沿这个方向前进的距离
# 向左走还是向右
x_direction = choice([-1, 1])
# 沿指定方向走多远
x_distance = choice([0, 1, 2, 3, 4])
x_step = x_direction * x_distance
y_direction = choice([-1, 1])
y_distance = choice([0, 1, 2, 3, 4])
y_step = y_direction * y_distance
# 拒绝原地踏步
if x_step == 0 and y_step == 0:
continue
# 计算下一个点的x和y值
next_x = self.x_values[-1] + x_step
next_y = self.y_values[-1] + y_step
self.x_values.append(next_x)
self.y_values.append(next_y)
rw_visual.py
import matplotlib.pyplot as plt
from random_walk import RandomWalk
while True:
# 创建一个RandomWalk实例,并将其包含的点都绘制出来
rw = RandomWalk(50000)
rw.fill_walk()
# # 隐藏坐标轴 该函数失效 反而隐藏了随机漫步
# plt.axes().get_xaxis().set_visible(False)
# plt.axes().get_yaxis().set_visible(False)
# 设置绘图窗口的尺寸
plt.figure(dpi=128,figsize=(10,6))
# 成功隐藏坐标轴
current_axes = plt.axes()
current_axes.xaxis.set_visible(False)
current_axes.yaxis.set_visible(False)
point_numbers = list(range(rw.num_points))
# plt.scatter(rw.x_values, rw.y_values, s=5, c='black', edgecolors='None')
plt.scatter(rw.x_values, rw.y_values, s=1, c=point_numbers, cmap=plt.cm.Blues, edgecolors='None')
# 突出起点和终点
# plt.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='None', s=100)
plt.show()
keep_running = input("Make another walk (y or n)")
if keep_running == 'n':
break
die.py
from random import randint
class Die():
# 表示一个骰子的类
def __init__(self,num_sides=6):
# 骰子默认6个面
self.num_sides = num_sides
def roll(self):
# 返回一个位于1和骰子面数之间的随机值
return randint(1,self.num_sides)
die_visual.py
from die import Die
import pygal
# 创建一个D6
die_1 = Die()
die_2 = Die(10)
# 掷几次骰子,并将结果存储在一个列表中
results = []
for roll_num in range(1000):
# result = die.roll()
result = die_1.roll() + die_2.roll()
results.append(result)
# print(results)
# 分析结果
frequencies = []
max_result = die_1.num_sides + die_2.num_sides
for value in range(2, max_result + 1):
frequency = results.count(value)
frequencies.append(frequency)
print(frequencies)
# 对结果进行可视化
hist = pygal.Bar()
hist.title = "Results of rolling one D6 1000 times"
hist.x_labels = ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12','13','14','15','16']
hist._x_title = "result"
hist._y_title = "frequency of result"
hist.add('D6+D6', frequencies)
hist.render_to_file('die_visual2.svg')
第16章 下载数据
highs.py
import csv
from matplotlib import pyplot as plt
from datetime import datetime
# filename = 'csvDir/sitka_weather_07-2014.csv'
# filename = 'csvDir/sitka_weather_2014.csv'
filename = 'csvDir/death_valley_2014.csv'
with open(filename) as f:
# 创建一个与该文件相关联的阅读器
reader = csv.reader(f)
# next()返回文件中的下一行
header_row = next(reader)
# print(header_row)
# # 打印文件头及其位置
# for index,column_header in enumerate(header_row):
# print(index,column_header)
# 从文件中获取最高气温
# # 获取文件头
# highs = []
# for row in reader:
# # 将字符串转换为数字
# high = int(row[1])
# highs.append(high)
# print(highs)
# 存储从文件中提取的最高气温和日期
dates, highs, lows = [], [], []
for row in reader:
# # 将包含日期信息的数据row[0]转换为datetime对象
# current_date = datetime.strptime(row[0], "%Y-%m-%d")
# dates.append(current_date)
#
# high = int(row[1])
# highs.append(high)
#
# low = int(row[3])
# lows.append(low)
try:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
high = int(row[1])
low = int(row[3])
except ValueError:
# 打印缺失数据的日期
print(current_date, "missing data")
else:
dates.append(current_date)
highs.append(high)
lows.append(low)
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red', linewidth=1)
plt.plot(dates, lows, c='blue', linewidth=1)
# 给图标区域着色
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)
# 设置图形的格式
plt.title("Daily high and low temperatures,July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
plt.ylabel("Temperature(F)", fontsize=16)
# 绘制倾斜的日期标签
fig.autofmt_xdate()
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
america.py
# import pygal
#
# wm = pygal.Worldmap()
# 创建一个突出北美、中美和南美的简单地图
import pygal_maps_world.maps
wm = pygal_maps_world.maps.World()
wm.title = 'North,Central,and South America'
wm.add('North America', ['ca', 'mx', 'us'])
wm.add('Central America', ['bz', 'cr', 'gt', 'hn', 'ni', 'pa', 'sv'])
wm.add('South America', ['ar', 'bo', 'br', 'cl', 'co', 'ec', 'gf', 'gy', 'pe', 'py', 'sr', 'uy', 've'])
wm.render_to_file('americas.svg')
countries.py
# from pygal.il8n import COUNTRIES
# 包已经废弃
from pygal_maps_world.i18n import COUNTRIES
# 获取两个字母的国别码
for country_code in sorted(COUNTRIES.keys()):
print(country_code, COUNTRIES[country_code])
country_codes.py
from pygal_maps_world.i18n import COUNTRIES
def get_country_code(country_name):
# 根据指定的国家,返回Pypal使用的两个字母的国别码
for code, name in COUNTRIES.items():
if name == country_name:
return code
# 如果没有找到指定的国家,就返回None
return None
# print(get_country_code('China'))
na_populations.py
# import pygal
import pygal_maps_world.maps
# 在世界地图上呈现数字数据
wm = pygal_maps_world.maps.World()
wm.title = 'Populations of Countries in North America'
wm.add('North America', {'ca': 34126000, 'us': 309349000, 'mx': 113423000})
wm.render_to_file('na_populations.svg')
world_population.py
import json
import pygal
from country_codes import get_country_code
import pygal_maps_world.maps
# 将数据都加载到一个列表中
filename = 'population_data.json'
with open(filename) as f:
pop_data = json.load(f)
# 创建一个包含人口数量的字典
cc_populations = {}
for pop_dict in pop_data:
if pop_dict['Year'] == '2010':
country_name = pop_dict['Country Name']
# 消除不能直接将小数的字符串转换为整数的错误
population = int(float(pop_dict['Value']))
# 国别码
code = get_country_code(country_name)
if code:
# print(country_name + ' : ' + str(population))
cc_populations[code] = population
else:
print('ERROR - ' + country_name)
wm = pygal_maps_world.maps.World()
wm.title = "World Population in 2010, by Country"
wm.add('2010',cc_populations)
wm.render_to_file('world_population.svg')