文章目录
chap15画图
折线图plot
# 折线图
#导入pyplot包为plt
import matplotlib.pyplot as plt
#定义横、纵坐标
input_values = [1,2,3,4,5]
squares = [1,4,9,16,25]
#传递给函数plot,横坐标默认为0 1 2 3 4
plt.plot(input_values, squares,linewidth = 5) # 线的注释
plt.title("Square Numbers",fontsize = 24)
plt.xlabel("Value",fontsize = 14)
plt.ylabel("Squre of Value",fontsize = 14)
plt.tick_params(axis='both',which = 'major',labelsize = 15) # 刻度线样式
#显示图像
plt.show()
散点图scatter
#自动计算数据,自定义颜色,颜色映射
import matplotlib.pyplot as plt
x_values = list(range(1,1001)) ##不加list好像也可 但测试打印不出来列表 就是一个range(x,y),所以还是加一个list转换
y_values = [x**2 for x in x_values]
plt.scatter(x_values,y_values,c = y_values,edgecolors='none',s = 40,cmap=plt.get_cmap('RdYlBu')) #'c' argument has 4 elements, .... .达成,
#有cmap的话c只能是y_value一个列表
#否则c可以是一个三元组表示的一个值???
# ‘c’ argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with ‘x’ & ‘y’. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points.
plt.title("Square Numbers",fontsize = 24)
plt.xlabel("Value",fontsize = 14)
plt.ylabel("Squre of Value",fontsize = 14)
plt.tick_params(axis='both',which = 'major',labelsize = 15) # 刻度线样式
plt.axis([0,1100,0,1100000]) #横纵坐标但取值范围
plt.show()
#plt.savefig('squares_plot.png',bbox_inches='tight')
#cmap可以去matplotlib.org查看color map的样式
#自动保存列表
#plt.show替换为
plt.savefig('squares_plot.png',bbox_inches='tight')
柱状图 bar
#pygal 矢量图
import pygal
from random import randint
class Die():
'''表示一个骰子的类'''
def __init__(self,num_sides=6):
'''骰子默认是6面'''
self.numsides = num_sides
def roll(self,times):
'''返回结果随机值'''
results = []
for time in range(times):
results.append(randint(1,self.numsides)) #闭
return results
def cal(self,results):
'''对结果进行统计'''
freq = []
for value in range(1,self.numsides+1): #range左闭右开
freq.append(results.count(value))
return freq
num_size = 6
times = 10000
die = Die(num_size)
results = die.roll(times)
frequency = die.cal(results)
#单个骰子平均,两个骰子和 正太
# die2 = Die(num_size)
# results2 = die2.roll(times)
# results3 = []
# for i,j in zip(results,results2):
# results3.append(i+j)
#
# frequency = []
# for value in range(2,13):
# frequency.append(results3.count(value))
#可视化结果
hist = pygal.Bar()
hist.title = "Results of rolling one D6 1000 times"
hist.x_labels = list(range(1,num_size+1))
hist.x_title = "Result"
hist.y_title = "Frequency of Result"
hist.add('D6',frequency)
hist.render_to_file('die_visual.svg')
#两个列表对应元素相加
a = [1,2,3]
b = [2,3,4]
print(a+b)
c=[]
for i,j in zip(a,b):
summ=i+j
c.append(summ)
print(c)
随机漫步云图scatter
#随机漫步,模拟现实中很多问题
import matplotlib.pyplot as plt
from random import choice
class RandomWalk():
'''一个生成随机漫步数据的类'''
def __init__(self,num_points = 5000):
'''初始化随机漫步的属性'''
self.num_points = num_points
#所有随机漫步始于0,0
self.x_values = [0]
self.y_values = [0]
def fill_walk(self):
'''计算随机漫步包含的所有点'''
#不断漫步,知道列表达到指定的长度
while len(self.x_values) < self.num_points:
#决定前进方向以及沿着个方向前进的距离
x_direction = choice([1,-1])
x_distance = choice([0,1,2,3,4])
x_step = x_direction*x_distance
y_direction = choice([1, -1])
y_distance = choice([0, 1, 2, 3, 4])
y_step = y_direction * y_distance
if x_step==0 and y_step==0:
continue
next_x = self.x_values[-1]+x_step
next_y = self.y_values[-1]+y_step
self.x_values.append(next_x)
self.y_values.append(next_y)
while True:
rw = RandomWalk(50000)
rw.fill_walk()
#设置绘图窗口尺寸
plt.figure(figsize=(10,6))
point_numbers = list(range(rw.num_points))
plt.scatter(rw.x_values,rw.y_values,c = point_numbers,cmap=plt.get_cmap("RdYlBu"),edgecolors='none',s = 1)
#隐藏坐标轴
plt.axes().get_xaxis().set_visible(False)
plt.axes().get_yaxis().set_visible(False)
#突出起点和终点
plt.scatter(0,0,c = 'green',edgecolors='none',s = 150)
plt.scatter(rw.x_values[-1],rw.y_values[-1],c = 'red',edgecolors='none',s = 150)
#plt.show()
plt.savefig('random walk.png',bbox_inched = 'tight')
keep_running = input("Make Another Walk?(y/n): ")
if keep_running=='n':
break
画图习题小结
1.pygal随机漫步——pygal.XY(stroke=False)
2.matplotlib柱状图——plt.bar(x,y)
图 | plt | pygal |
---|---|---|
柱状图 | plt.bar | pygal.Bar() |
散点图 | plt.scatter | pygal.XY() |
折线图 | plt.plot | pygal.Line() |
csv文件处理
3.3 Files and the Operating System 文件与操作系统:系统总结一下的参考资料吧
csv文件读取函数
csv | json | txt |
---|---|---|
reader = csv.reader(file) | dump(x,file) | read() |
row = next(reader) | load(file) | readlines() |
相当于有一个指针一行一行 | readline() |
# csv文件读取
import csv
with open('zichuang.csv') as file:
#template
reader = csv.reader(file)
#next()调用一次读一行,作为列表
header_row = next(reader)
for index,column_header in enumerate(header_row):
print(index.__str__()+' '+column_header)
#打印从第二行开始第二列
column_2=[]
for row in reader:
column_2.append(row[1])
print(column_2)
date模块处理,
fillbetween填充两者中间
处理缺失值ValueError加try-catch-else
#date
import matplotlib.pyplot as plt
import csv
from datetime import datetime
with open("zichuang_date.csv") as file:
reader = csv.reader(file)
header_row = next(reader)
dates,highs = [],[]
for row in reader:
#print(row[0])
date = datetime.strptime(row[0],"%m/%d/%Y") #Y 大写
dates.append(date)
highs.append(row[1])
fig = plt.figure(dpi = 128,figsize=(10,6))
plt.plot(dates,highs,c='red')
plt.title("xiadabi")
plt.xlabel("",fontsize = 20)
plt.ylabel("Temperature(F)",fontsize = 20)
plt.tick_params(axis='both',which='both',labelsize = 20)
fig.autofmt_xdate() #让x轴的刻度日期斜着写
plt.show()
- log10指代数量级上的增长,比如10 100 1000->1 2 3
zip和groupby
zip
#zip多个列表合成一个列表,元素为对应位置元素组成的元组,多余的去掉,*解压 [*zipped_2],list(Object)但和*冲突,解压之后也没法再解压
groupby
#关于groupby但说明
from itertools import groupby
#groupby分组,按指定位置的元素
print("groupby的结果")
test=[('a',5),('a',4),('b',1),('a',3),('a',2),('b',4),('b',3),('c',5)]
temp = groupby(sorted(test),lambda x:x[0]) #sorted按首位元素大小排序,groupby按0位元素大小分组
##得到一个列表,[(分类元素'a',剩下的东西),]
print("1.list处理之前打印temp")
print(temp)
# print("2.list处理的temp")
# print(list(temp))
# print("3.list处理过的temp")
# print(temp)
print("4.list未处理过的temp分组打印")
for a,b in temp:
print(list(b))
#注意这里b还是一个object (itertools.groupby object)想要看内容,需要list(b)
'''
groupby的结果
1.list处理之前打印temp
<itertools.groupby object at 0x106add138>
2.list处理的temp
[('a', <itertools._grouper object at 0x106ad5588>), ('b', <itertools._grouper object at 0x106ad58d0>), ('c', <itertools._grouper object at 0x106ad5908>)]
3.list处理过的temp
<itertools.groupby object at 0x106add138>
4.list未处理过的temp分组打印
[('a', 2), ('a', 3), ('a', 4), ('a', 5)]
[('b', 1), ('b', 3), ('b', 4)]
[('c', 5)]
'''
收盘价月均值
#收盘价均值
# zip是一个列表,每个纵列为一个元组
# groupby根据某一位元素分组,得到一个字典,key:分组的元素,value:分组后的一堆元组,是一个object
import pygal
import json
import math
from itertools import groupby
#导入分组,月份,周数,周几再计算每组的平均值
#求某段为单位的平均数
def draw_line(x_data, y_data, title, y_legend):
# x轴 y轴 生成文件的名称 图例,线的名称eg:'月平均值'
xy_map = []
for x, y in groupby(sorted(zip(x_data, y_data)), key=lambda _: _[0]):
'''
有月份 有月份对应的温度
同样的"月份"对应的值全部取出来 然后计算平均值
'''
# zip:x轴y轴合并,对应位置元素组成元组:
# x_data=[1,1,2,2]
# y_data=[3,2,4,6]
# (zip(x_data,y_data)) = [(1,3),(1,2),(2,4),(2,6)]
# 排序:
# sorted(zip(x_data,y_data))=[(1,2),(1,3),(2,4),(2,6)]
# groupby:
# key=lambda _:_[0] 分组按0位元素,得到一个字典
# {1:[(1,2),(1,3)],2:[(2,4),(2,6)]}
# for循环:
# x就是key,分组的依据 x=1 x=2
# y就是value 元组组成的列表 y=[(1,2),(1,3)] y=[(2,4),(2,6)]
y_list = [v for _, v in y]
#y里面的元素是(1,2)(1,3) 不要1 取出2和3
xy_map.append([x, sum(y_list)/len(y_list)])
#每一对值作为一个列表放到xy_map=[[1,2.5],[2,5]]
x_unique, y_mean = [*zip(*xy_map)]
# *解包,里面两个元组->两个元组,里面两个列表->两个列表
# *xy_map=[1,2.5],[2,5]
# zip再纵向值压缩
# zip(*xy_map)=[(1,2),(2.5,5)]
# *zip(*xy_map) = (1,2),(2.5,5)
# 用两个值把两个元组提取出来
# x_unique=(1,2) 不管1月份多少个但是最后都合成了一个
# y_mean = (2.5,5)
line_chart = pygal.Line()
line_chart.title = title
line_chart.x_labels = x_unique
line_chart.add(y_legend, y_mean)
line_chart.render_to_file(title+'.svg')
return line_chart
filename = 'btc_close_2017_request.json'#btc_close_2017_request
with open(filename) as f:
btc_data = json.load(f)
for btc_dict in btc_data:
date = btc_dict['date']
month = int(btc_dict['month'])
week = int(btc_dict['week'])
weekday = btc_dict['weekday']
close = int(float(btc_dict['close']))
print("{} is month {} week {},{}.The close price is {} RMB".format(date, month, week, weekday, close))
dates, months, weeks, weekdays, closes = [], [], [], [], []
for btc_dict in btc_data:
#
dates.append(btc_dict['date'])
months.append(int(btc_dict['month']))
weeks.append(int(btc_dict['week']))
weekdays.append(btc_dict['weekday'])
closes.append(int(float(btc_dict['close'])))
idx_month = dates.index('2017-12-01')
# str.index(str,beg=0,end=len(str)) 从beg到end查str 有返回索引
#调用函数:由于12月数据不完整,只取1-11月
line_chart_month = draw_line(months[:idx_month], closes[:idx_month], '收盘价月日均值', '月日均值')
收盘价周均值
import pygal
import json
import math
from itertools import groupby
#导入分组,月份,周数,周几再计算每组的平均值
#求某段为单位的平均数
def draw_line(x_data, y_data, title, y_legend):
# x轴 y轴 生成文件的名称 图例,线的名称eg:'月平均值'
xy_map = []
for x, y in groupby(sorted(zip(x_data, y_data)), key=lambda _: _[0]):
# zip:x轴y轴合并,对应位置元素组成元组:
# x_data=[1,1,2,2]
# y_data=[3,2,4,6]
# (zip(x_data,y_data)) = [(1,3),(1,2),(2,4),(2,6)]
# 排序:
# sorted(zip(x_data,y_data))=[(1,2),(1,3),(2,4),(2,6)]
# groupby:
# key=lambda _:_[0] 分组按0位元素,得到一个字典
# {1:[(1,2),(1,3)],2:[(2,4),(2,6)]}
# for循环:
# x就是key,分组的依据 x=1 x=2
# y就是value 元组组成的列表 y=[(1,2),(1,3)] y=[(2,4),(2,6)]
y_list = [v for _, v in y]
#y里面的元素是(1,2)(1,3) 不要1 取出2和3
xy_map.append([x, sum(y_list)/len(y_list)])
#每一对值作为一个列表放到xy_map=[[1,2.5],[2,5]]
x_unique, y_mean = [*zip(*xy_map)]
# *解包,里面两个元组->两个元组,里面两个列表->两个列表
# *xy_map=[1,2.5],[2,5]
# zip再纵向值压缩
# zip(*xy_map)=[(1,2),(2.5,5)]
# *zip(*xy_map) = (1,2),(2.5,5)
# 用两个值把两个元组提取出来
# x_unique=(1,2) 不管1月份多少个但是最后都合成了一个
# y_mean = (2.5,5)
line_chart = pygal.Line()
line_chart.title = title
line_chart.x_labels = x_unique
line_chart.add(y_legend, y_mean)
line_chart.render_to_file(title+'.svg')
return line_chart
filename = 'btc_close_2017_request.json'#btc_close_2017_request
with open(filename) as f:
btc_data = json.load(f)
for btc_dict in btc_data:
date = btc_dict['date']
month = int(btc_dict['month'])
week = int(btc_dict['week'])
weekday = btc_dict['weekday']
close = int(float(btc_dict['close']))
#print("{} is month {} week {},{}.The close price is {} RMB".format(date, month, week, weekday, close))
dates, months, weeks, weekdays, closes = [], [], [], [], []
for btc_dict in btc_data:
#
dates.append(btc_dict['date'])
months.append(int(btc_dict['month']))
weeks.append(int(btc_dict['week']))
weekdays.append(btc_dict['weekday'])
closes.append(int(float(btc_dict['close']))) #先转浮点 再转整数
idx_month = dates.index('2017-12-10') #注意往后退一天,索引+切片
# str = '123456'
# a = str.index('3')
# b = str[:a]
# print(b) #12
# str.index(str,beg=0,end=len(str)) 从beg到end查str 有返回索引
#调用函数:由于12月数据不完整,只取1-11月
line_chart_month = draw_line(weeks[1:idx_month+1], closes[1:idx_month+1], '收盘价周日均值', '周日均值')
# 注意weeks列表在append的时候就要变int整形,因为排序的时候字符串(11会排在2前面)和数是不一样的
zip&groupby 小结
- 排序注意字符串和数,即从文件中取数的时候就判断一下该用什么类型
- 索引+切片 开闭区间
- zip+groupby 两个列表对应纵列操作
with open('收盘价Dashboard.html','w',encoding='utf-8') as html_file:
html_file.write('<html><head><title>收盘价Dashboard</title><metacharset="utf-8"></head><body>\n')
for svg in[
'收盘价周均值.svg','收盘价星期均值.svg','收盘价月均值.svg'
]:
html_file.write(' <object type="image/svg+xml" data="{0}" height=500></object>\n'.format(svg))
html_file.write('</body></html>')