之前学的数据可视化
#数据可视化
'''=============================================================================================='''
'''plt.plot(x, y, fmt='xxx', linestyle=, marker=, color=, linewidth=, markersize=, label=, )
plot()函数:
(1)linestyle:此字段是线的样式,参数形式:字符串(-:实线, --:虚线)
(2)linewidth:此参数是线的粗细,粗细程度和所定数值大小有关,参数形式:数值
(3)marker:点的样式,字符串
(4)markersize:点的大小,参数形式:数值
(5)color:调节线条还有点的颜色 ,字符串,参数形式字符串
'''
'''-----------------------------------------------------------------------------------------'''
'''import matplotlib.pyplot as plt
squares = [1, 4, 9, 16, 25]
fig, ax = plt.subplots() #plt.subplots()返回两个变量,一个是Figure实例fig,另一个 AxesSubplot实例axfig代表整个图像,
#ax代表坐标轴和画的子图,通过下标获取需要的子区域,相当于fig = plt.figure()和ax = fig.add_subplot()
ax.plot(squares) #plot()函数画出一系列的点,并且用线将它们连接起来
plt.show()'''
'''--------------------------------------------------------------------------------------'''
#绘制折线图
'''import matplotlib.pyplot as plt
input_values = [1, 2, 3, 4, 5]
squers = [1, 4, 9, 16, 25]
#plt.style.use('seaborn') #通过plt.style.use()改变图标的样式
fig, ax = plt.subplots()
ax.plot(input_values, squers, linewidth = 3, linestyle = '--') #传递输入值和输出值,默认输入值从0开始,设置线条粗度为3
#设置标题和坐标轴
ax.set_title('number_squers', fontsize = 24)
ax.set_xlabel('value', fontsize = 14)
ax.set_ylabel('squers', fontsize = 14)
plt.show()'''
'''----------------------------------------------------------------------------------------'''
'''plt.scatter()函数用于绘制散点图
plt.scatter(x, y, s, c, marker, cmap, norm, alpha, linewidths, edgecolorsl)
x: x轴数据
y: y轴数据
s: 散点大小
c: 散点颜色
marker: 散点形状
cmap: 指定特定颜色图,该参数一般不用,有默认值
alpha: 散点的透明度
linewidths: 散点边框的宽度
edgecolors: 设置散点边框的颜色
'''
''''#绘制散点图
#values1 = [1, 2, 3, 4, 5]
#squers1 = [1, 4, 9, 16, 25]
values2 = [x for x in range(100)]
squers2 = [x**2 for x in values2]
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
#ax.scatter(2,4, s = 200) #绘制一个点
#ax.scatter(values1, squers1, s = 100) #向scatter中传两个分别包含x,y的列表
ax.scatter(values2, squers2, s = 30, c = 'black') #通过循环来向scatter中传递x,y
ax.set_title('squers', fontsize = 14)
ax.set_xlabel('value', fontsize = 10)
ax.set_ylabel('squer', fontsize = 10)
plt.show()
#plt.savefig('...', bbox_inches='tight')调用这个可以将图标保存,第一个参数指明保存的文件位置,第二个说明剪裁空白部分(不需要则省略)'''
'''-----------------------------------------------------------------------------------------------'''
'''import matplotlib.pyplot as plt
values1 = range(1,5001)
cubes1 = [x**3 for x in values1]
fig, ax = plt.subplots()
ax.scatter(values1, cubes1, s = 10, c = cubes1, cmap = 'Blues')
#将参数c设置为y值列表,根据y值变化来改变,使用cmap告诉用哪种颜色来映射
plt.show()'''
'''----------------------------------------------------------------------------------------------'''
#随机漫步的分布图
'''from random import choice
def get_step(): #这是一个计算步长的函数
n_direction = choice([-1, 1])
n_distance = choice([0, 1, 2, 3, 4])
return n_direction * n_distance
class Randonwalk: #这个类用于实现随机漫步
def __init__(self, number):
self.number = number
self.x_location = [0] #起始位置为(0,0)
self.y_location = [0]
def fill_walk(self): #下一个坐标点的计算
while self.number:
''''''x_direction = choice([-1, 1])
x_distance = choice([0, 1, 2, 3, 4])
x_displacement = x_direction * x_distance
y_direction = choice([-1, 1])
y_distance = choice([0, 1, 2, 3, 4])
y_displacement = y_direction * y_distance
x_locationed = self.x_location[-1] + x_displacement
y_locationed = self.y_location[-1] + y_displacement''''''
x_locationed = self.x_location[-1] + get_step()
y_locationed = self.y_location[-1] + get_step()
if x_locationed == self.x_location[-1] and y_locationed == self.y_location[-1]:
continue
self.x_location.append(x_locationed)
self.y_location.append(y_locationed)
self.number -= 1
def return_x(self):
return self.x_location
def return_y(self):
return self.y_location
num = 1000
x_locations = []
y_locations = []
randonwalk = Randonwalk(num)
randonwalk.fill_walk()
x_locations = randonwalk.return_x()
y_locations = randonwalk.return_y()
import matplotlib.pyplot as plt
''''''fig, ax = plt.subplots() #散点图
ax.scatter(x_locations, y_locations, s = 10, c = range(num+1), cmap = 'Blues')
ax.scatter(0, 0, s = 10, c = 'red')
ax.scatter(x_locations[-1], y_locations[-1], s = 10, c = 'red')
plt.show()''''''
fig, ax = plt.subplots() #折线图
ax.plot(x_locations, y_locations, linewidth = 5)
plt.show()'''
'''----------------------------------------------------------------------------------------'''
#条形图
'''
matplotlib.pyplot中的bar()函数来制作条形图
bar(x,height, width,*,align=‘center’,**kwargs)
x:包含所有柱子的下标的列表
height:包含所有柱子的高度值的列表
width每个柱子的宽度。可以指定一个固定值,那么所有的柱子都是一样的宽。或者设置一个列表,这样可以分别对每个柱子设定不同的宽度。
align:柱子对齐方式,有两个可选值:center和edge。center表示每根柱子是根据下标来对齐, edge则表示每根柱子全部以下标为起点,然后显示到下标的右边。如果不指定该参数,默认值是center。
'''
'''from random import randint
class Die:
def __init__(self, num_side, num_roll):
self.num_side = num_side
self.answer = []
self.num_roll = num_roll
def roll(self):
while self.num_roll:
number = randint(1, self.num_side)
self.answer.append(number)
self.num_roll -= 1
return self.answer
num_sides = 6
die = Die(num_sides, 100)
y_values = []
y_value = die.roll()
for value in range(1, num_sides+1):
y = y_value.count(value)
y_values.append(y)
x_values = [x for x in range(1, num_sides+1)]
import matplotlib.pyplot as plt
plt.title('die', fontsize = 20)
plt.xlabel('sides', fontsize = 10)
plt.ylabel('num', fontsize = 10)
plt.bar(x = x_values, height = y_values, width = 0.2)
plt.show()'''
'''-------------------------------------------------------------------------------------'''
#plotly创建的数据可视化
'''from plotly.graph_objs import Bar, Layout
import plotly.graph_objs as go
x_values = [1,2,3]
y_values = [1,4,9]
#用Layout()来绘制图形x轴和y轴的标签,类Layout()返回一个指定图标布局和配置的对象
layout = go.Layout(
title = 'numbers',
xaxis = {'title':'number'},
yaxis = {'title': 'number**2'})
line = go.Bar(x=x_values, y=y_values) #用Bar创建一个柱状数据,传入x轴和y轴
fig = go.Figure(data = line, layout = layout) #用go.Figure()来创建一张图,传入数据和布局
fig.show() #fig.show()用来显示图'''
'''-------------------------------------------------------------------------------------'''
#plotly来对骰子的点数的数据可视化
'''from random import randint
class Die:
def __init__(self, side, num):
self.side = side
self.num = num
self.roll_number = []
def roll(self):
number = []
while self.num:
reason = randint(1, self.side)
number.append(reason)
self.num -= 1
for num in range(1, self.side+1):
count = number.count(num)
self.roll_number.append(count)
return self.roll_number
side = 6
die = Die(side, 100)
y_value = die.roll()
x_value = [x for x in range(1,side+1)]
import plotly.graph_objs as go #将plotly.graph_objs中引入用于绘图
from plotly.graph_objs import Bar #将plotly.graph_objs中的Bar引入用于绘画柱状图
data = Bar(x = x_value, y = y_value) #将x,y的数据传入Bar返回一个图标数据
title = 'the num of die num'
xl = {'title':'side'}
yl = {'title':'num'}
layout = go.Layout(title = title, xaxis = xl, yaxis = yl) #设置图片的布局,数据用字典的方式传入
fig = go.Figure(data = data, layout = layout) #传入数据和布局,绘画这个图
fig.show()'''
'''--------------------------------------------------------------------------------------'''
#实现两个骰子点数之和的统计
'''from random import randint
class Die:
def __init__(self, num_side, num_roll):
self.num_side = num_side #掷骰子的面数
self.num_roll = num_roll #掷骰子的次数
self.all_answer = [] #掷骰子的所有可能
self.each_p_answers = [] #掷骰子的每一种可能的次数
def roll(self):
while self.num_roll:
first_point = randint(1, self.num_side) #从1到面数之间随机选择一个数
second_point = randint(1, self.num_side)
self.all_answer.append(first_point + second_point) #存储每一个值到列表中
self.num_roll -= 1
for value in range(2, 2 * self.num_side + 1):
each_point_number = self.all_answer.count(value)
self.each_p_answers.append(each_point_number)
return self.each_p_answers
num_sides = 8
die = Die(num_sides, 1000)
y_values = die.roll()
x_values = [x for x in range(2, 2*num_sides+1)]
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.bar(x_values, y_values, width = 0.4) #用bar()进行条形图的打印
ax.set_title('die', fontsize = 15)
ax.set_xlabel('side', fontsize = 10)
ax.set_ylabel('num', fontsize = 10)
plt.show()'''
'''================================================================================'''
#对csv文件数据进行数据可视化
'''import csv
file_name = 'sitka_weather_07-2018_simple.csv'
with open(file_name) as f:
#csv.reader()以逗号分隔读取每一个数据,每一行存于一个列表中,返回一个reader对象
reader = csv.reader(f)
#调用next()方法从迭代器中检索下一个项目,此处只调用一次,只读取文件第一行
head = next(reader)
print(head)
tmaxs = []
for inf in reader: #由于前面已经读取了第一行,这里从第二行开始
tmax = int(inf[-2])
tmaxs.append(tmax)
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(tmaxs)
ax.set_title('weather', fontsize = 15)
ax.set_xlabel('data', fontsize = 10)
ax.set_ylabel('tmputuer', fontsize = 10)
plt.show()'''
'''---------------------------------------------------------------------------------'''
#调用模块datetime中的方法strptime()来将字符型的时间转化为一个表示日期的对象
'''from datetime import datetime
frist_day = '2022-01-20'
c_day = datetime.strptime(frist_day, '%Y-%m-%d') #datetime.strtime()将用户输入的字符串转化为时间类型,后面时间可以添加为%H:%M:%S
print(c_day)'''
'''---------------------------------------------------------------------------------'''
#时间日期一起的数据可视化
'''import csv
file = 'sitka_weather_07-2018_simple.csv'
with open(file) as w_inf:
#从CSV文件中读取最高温和日期两项数据
reader = csv.reader(w_inf)
title_inf = next(reader)
print(title_inf)
tmax_infs = []
date_inf = []
for inf in reader:
tmax_inf = int(inf[-2]) #文件中的温度是字符串类型,先强制转化为int类型
tmax_infs.append(tmax_inf)
date = inf[2]
date_inf.append(date)
from datetime import datetime
#使用datetime中的datetime.strptime()函数来对用字符串存储的时间信息转化为时间类型
date_infs = []
for inf in date_inf:
date = datetime.strptime(inf, '%Y-%m-%d')
date_infs.append(date)
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(date_infs, tmax_infs)
plt.show()'''
'''-----------------------------------------------------------------------------'''
#绘制全年最高气温图
'''import csv
file = 'sitka_weather_2018_simple.csv'
with open(file) as weather_inf:
reader = csv.reader(weather_inf) #通过csv.reader()读取csv文件
title = next(reader) #访问第一行
print(title)
dates = []
tmaxs = []
for inf in reader:
date = inf[2]
tmax = int(inf[-2]) #文件以字符串类型来存储,要先强制转化为int类型
dates.append(date)
tmaxs.append(tmax)
from datetime import datetime
#通过datetime.strptime()将字符串类型的时间数据转化为时间数据
date_infs = []
for date in dates:
date_inf = datetime.strptime(date, '%Y-%m-%d')
date_infs.append(date_inf)
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(date_infs, tmaxs, color = 'red')
ax.set_title('year weather', fontsize = 15)
ax.set_xlabel('date', fontsize = 10)
ax.set_ylabel('tmax', fontsize = 10)
plt.show()'''
'''-------------------------------------------------------------------------------------'''
#同时展示最高温和最低温的图标
'''import csv
from datetime import datetime
file = 'sitka_weather_2018_simple.csv'
with open(file) as weather_inf:
reader = csv.reader(weather_inf) #将文件转化为Python的可读模式
title = next(reader)
print(title)
dates = []
tmaxs = []
tmins = []
for inf in reader:
date = datetime.strptime(inf[2], '%Y-%m-%d')
tmax = int(inf[-2])
tmin = int(inf[-1])
dates.append(date)
tmaxs.append(tmax)
tmins.append(tmin)
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(dates, tmaxs, color = 'red') #绘制最高温
ax.plot(dates, tmins, color = 'blue') #绘制最低温
ax.set_title('weather', fontsize = 20)
ax.set_xlabel('date', fontsize = 15)
ax.set_ylabel('tempeture', fontsize = 15)
ax.fill_between(dates, tmaxs, tmins, facecolor = 'yellow') #fill.between()通过传递一个x,两个y和颜色来进行中间色的填充
plt.show()'''
'''--------------------------------------------------------------------------------------------'''
#改进,对数据缺失引发的错误进行改进
'''import csv
from datetime import datetime
file = 'death_valley_2018_simple.csv'
with open(file) as weather_inf:
reader = csv.reader(weather_inf)
title = next(reader)
print(title)
dates = []
tmaxs = []
tmins = []
for inf in reader:
try:
date = datetime.strptime(inf[2], '%Y-%m-%d')
tmax = int(inf[-2])
tmin = int(inf[-1])
except ValueError: #在转化为int类型是空字符串会导致valueerror错误,引起程序停止运行,通过try的1方式让程序顺利运行
print(f"{inf[2]} 数据缺失")
else:
dates.append(date)
tmaxs.append(tmax)
tmins.append(tmin)
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(dates, tmaxs, color = 'red')
ax.plot(dates, tmins, color = 'blue')
ax.set_title('weather in death valley', fontsize = 15)
ax.set_xlabel('date', fontsize = 10)
ax.set_ylabel('temputuer', fontsize = 10)
ax.fill_between(dates, tmaxs, tmins, facecolor = 'green') #对中间区域进行填充
plt.show()'''
'''------------------------------------------------------------------------------------------'''
'''import json
filename = 'eq_data_1_day_m1.json'
with open(filename) as f:
all_eq_data = json.load(f)
readable_file = 'readable_eq_data.json'
with open(readable_file, 'w') as f:
json.dump(all_eq_data, f, indent = 4)''' #通过传递参数indent来设置缩进量,将其转化为易读的格式
'''------------------------------------------------------------------------------------'''
#地震数据的数据可视化
'''import json
file = 'readable_eq_data.json'
with open(file) as f:
all_eq_data = json.load(f)
all_eq = all_eq_data['features'] #所有的地震数据都存在字典features中
mags = []
lontitude = []
latitude = []
title = []
for inf in all_eq:
mags.append(inf['properties']['mag']) #提取所有的震级放入列表mags中
title.append(inf['properties']['title']) #提取地震的标题
lontitude.append(inf['geometry']['coordinates'][0]) #提取震区的经度
latitude.append(inf['geometry']['coordinates'][1]) #提取震区的纬度
#print(mags[:10])
#print(lontitude[:5])
#print(latitude[:5])
#print(title[:5])
import plotly.express as px #通过plotly.express来绘图
import pandas as pd
data = pd.DataFrame(data=zip(lontitude,latitude,title,mags),columns=['经度','纬度','位置','震级']) #使用pandas将数据封装起来
data.head()
fig = px.scatter( #另一种定义数据的方法
data,
x='经度',
y='纬度',
range_x=[-200,200],
range_y=[-90,90],
title='global earthquack',
size='震级', #size参数来指定图中每个标记的尺寸,这里用图标的大小来表示震级的大小
size_max=10, #标记尺寸默认为20像素,通过size_max来改为缩放到10
)
# fig = px.scatter( #简单的定义数据的方法
# x = lontitude,
# y = latitude,
# labels = {'x':'lontitude', 'y':'latitude'},
# range_x = [-200, 200],
# range_y = [-90, 90],
# width = 800,
# height = 800,
# title = 'earthquakes'
# )
#fig.write_html('global_earthquakes.html') #fig.write_html()将图片存放在文件中
fig.show()'''
'''--------------------------------------------------------------------------------------'''
#地震图的数据可视化
'''import json
file = 'eq_data_30_day_m1.json'
with open(file) as f:
readable_file = json.load(f)
read_file = 'readable_eq_30_data'
with open(read_file,'w') as r:
json.dump(readable_file, r, indent=4) #将其改为容易阅读的数据
title = []
mags = []
lontitude = [] #经度
latitude = [] #纬度
with open(read_file) as rf:
readable_inf = json.load(rf)
for inf in readable_inf['features']:
title.append(inf['properties']['title'])
mags.append(inf['properties']['mag'])
lontitude.append(inf['geometry']['coordinates'][0])
latitude.append(inf['geometry']['coordinates'][1])
import pandas as pd
data = pd.DataFrame(data=zip(title,mags,lontitude,latitude),columns=['位置','震级','经度','纬度'])
#用这种方式所有有关的数据都以键值对的形式放在一个字典中
import plotly.express as px
fig=px.scatter(
data,
x='经度',
y='纬度',
title='全球地震图',
range_x=[-200,200],
range_y=[-90,90],
width=800,
height=800,
size='震级',
size_max=10,
color='震级',
hover_name='位置'
)
fig.show()'''
'''------------------------------------------------------------------------------------'''
#全球火灾的数据可视化
'''import csv
from datetime import datetime
#获取数据
latitude=[] #纬度
longitude=[] #经度
dates=[]
brightness=[]
file = 'world_fires_1_day.csv'
with open(file) as fir_inf:
reader=csv.reader(fir_inf)
title=next(reader)
for inf in reader:
latitude.append(float(inf[0]))
longitude.append(float(inf[1]))
brightness.append(float(inf[2]))
date=datetime.strptime(inf[5],'%Y-%m-%d')
dates.append(date)
print(title)
#数据的可视化
import plotly.express as px
import pandas as pd
data=pd.DataFrame(data=zip(longitude,latitude,dates,brightness),columns=['经度','纬度','位置','火势'])
fig = px.scatter(
data,
x='经度',
y='纬度',
range_x=[-200,200],
range_y=[-90,90],
height=800,
width=800,
title='世界着火数据',
color='火势',
hover_name='位置',
)
fig.show()'''
下面是数据可视化要用到的数据(用百度网盘打开)
这也是<Python编程,从入门到实践>这本书的数据
链接:https://pan.baidu.com/s/1mSm73bU8833APDc0HwQ6BQ?pwd=LyWj
提取码:LyWj