数据可视化

生成数据

绘制简单折线图

安装matplotlib

import matplotlib.pyplot as plt

input_values = [1,2,3,4,5]
cubes = [1,8,27,64,125]
plt.plot(input_values,cubes,linewidth=5)

#设置图表标题,坐标轴加标签
plt.title('Cube Numbers',fontsize=24)
plt.xlabel('Value',fontsize=14)
plt.ylabel('Cube of Value',fontsize=14)

#刻度标记大小
plt.tick_params(axis='both',labelsize=14)

plt.show()

散点图

import matplotlib.pyplot as plt

x_values = list(range(1,5001))
y_values = [x**3 for x in x_values]

plt.scatter(x_values,y_values,c=y_values,cmap=plt.cm.Blues,s=40)
  
p lt.title('Cube Numbers',fontsize=24)
plt.xlabel('Value',fontsize=14)
plt.ylabel('Cube of value',fontsize=14)

plt.tick_params(axis='both',which='major',labelsize=14)

#坐标轴取值范围
g

plt.savefig('cubes_plot.png',bbox_inches='tight')

随机漫步

from random import choice

class RandomWalk():
	
	def __init__(self,num_points=5000):
		self.num_points = num_points
		
		self.x_values = [0]
		self.y_values = [0]
		
	def fill_walk(self):
		
		while len(self.x_values) < self.num_points:
			x_direction = choice([1,-1])
			x_distance = choice([0,1,2,3,4])
			x_step = x_direction * x_distance
			
			y_direction = choice([1,-1])
			y_distance = choice([0,1,2,3,4])
			y_step = y_direction * y_distance
			
			if x_step == 0 and y_step == 0:
				continue
				
			next_x = self.x_values[-1] + x_step
			next_y = self.y_values[-1] + y_step
			
			self.x_values.append(next_x)
			self.y_values.append(next_y)
import matplotlib.pyplot as plt

from random_walk import RandomWalk

while True:
	rw = RandomWalk(5000)
	rw.fill_walk()
	
	plt.figure(figsize=(10,6)) #输出尺寸
	
	point_numbers = list(range(rw.num_points))
	plt.scatter(rw.x_values,rw.y_values,c=point_numbers,
	            cmap=plt.cm.Blues,s=15)
	plt.scatter(0,0,c='green',s=100) #起点
	plt.scatter(rw.x_values[-1],rw.y_values[-1],c='red',s=100) #终点
	
	plt.axes().get_xaxis().set_visible(False)#不显示x轴
	plt.axes().get_yaxis().set_visible(False)
	
	plt.show()
	
	keep_running = input("make another walk?(y/n):")
	if keep_running == 'n':
		break

使用Pygal模拟掷骰子

安装Pygal

from random import randint

class Die():
	
	def __init__(self,num_sides=6):
		self.num_sides = num_sides
		
	def roll(self):
		return randint(1,self.num_sides)
from die import Die
import pygal

die_1 = Die()
die_2 = Die(10)

results = []

for roll_num in range(50000):
	result = die_1.roll() + die_2.roll()
	results.append(result)
	
frequencies = []
max_result = die_1.num_sides + die_2.num_sides
for value in range(1,max_result+1):
	frequency = results.count(value)
	frequencies.append(frequency)
	
hist = pygal.Bar()

hist.title = "Results of rolling two D6 dice 1000 times."
hist.x_labels = ['2','3','4','5','6','7','8','9','10','11','12','13','14','15','16']
hist.x_title = "Result"
hist.y_title = "Frequency of Result"

hist.add('D6 + D10',frequencies)
hist.render_to_file('die_visual.svg')

下载数据

CSV文件格式

查看文件第一行 数据描述

filename = 'sitka_weather_2014.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)#返回下一行 之前没有操作所以是 返回第一行
	for index,name in enumerate(header_row):
		print(index,name)
import csv
from datetime import datetime
from matplotlib import pyplot as plt

#从文件中获取日期 最高气温 最低气温
filename = 'death_valley_2014.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	dates,highs,lows = [],[],[]
	for row in reader:
		try:
			current_date = datetime.strptime(row[0],"%Y-%m-%d")
			high = int(row[1])
			low = int(row[3])
		except ValueError:
			print(current_date,'missing data')
		else:
			dates.append(current_date)
			highs.append(high)
			lows.append(low)

#根据数据绘制图形
fig = plt.figure(dpi=128,figsize=(10,6))
plt.plot(dates,highs,c='red',alpha=0.5)
plt.plot(dates,lows,c='blue',alpha=0.5)
plt.fill_between(dates,highs,lows,facecolor='blue',alpha=0.1)

#设置图形的格式
plt.title("Daily high and low temperatures,2014",fontsize=24)
plt.xlabel('',fontsize=16)
fig.autofmt_xdate()
plt.ylabel('Temperature(F)',fontsize=16)
plt.tick_params(axis='both',which='major',labelsize=16)

plt.show()

制作世界人口地图:JSON格式

查看国家代码

from pygal_maps_world.i18n import COUNTRIES

for country_code in sorted(COUNTRIES.keys()):
	print(country_code,COUNTRIES[country_code])

定义得到国家名称代码的函数

from pygal_maps_world.i18n import COUNTRIES

def get_country__code(country_name):
	for code,name in COUNTRIES.items():
		if name == country_name:
			return code
	return None
	
#print(get_country__code('Andorra'))

绘制美洲地图

import pygal_maps_world.maps

wm = pygal_maps_world.maps.World()
wm.title = 'North, Central, and South America'

wm.add('North America',['ca','mx','us'])
wm.add('Central America',['bz','cr','gt','hn','ni','pa','sv'])
wm.add('South America',['ar','bo','br','cl','co','ec','gf',
       'gy','pe','py','sr','uy','ve'])
       
wm.render_to_file('amerias.svg')

绘制北美地图

import pygal_maps_world.maps

wm = pygal_maps_world.maps.World()
wm.title = 'Populations of Countries in North America'
wm.add('North America',{'ca':341260000,'us':309349000,'mx':113423000})

wm.render_to_file('na_populations.svg')

绘制世界地图

**import json 
import pygal_maps_world.maps
from country_codes import get_country__code
from pygal.style import RotateStyle,LightColorizedStyle

#将数据加载到一个列表中
filename = 'population_data.json'
with open(filename) as f:
	pop_data = json.load(f)
	
#创建一个包含人口数量的字典
cc_populations = {}
for pop_dict in pop_data:
	if pop_dict['Year'] == '2010':
		country_name = pop_dict['Country Name']
		population = int(float(pop_dict['Value']))
		code = get_country__code(country_name)
		if code:
			cc_populations[code] = population

#根据人口数量将所有国家分成三组
cc_pops_1,cc_pops_2,cc_pops_3 = {},{},{}
for cc,pop in cc_populations.items():
	if pop < 10000000:
		cc_pops_1[cc] = pop
	elif pop < 1000000000:
		cc_pops_2[cc] = pop
	else:
		cc_pops_3[cc] = pop

#print(len(cc_pops_1),len(cc_pops_2),len(cc_pops_3))

wm_style = RotateStyle('#336699',base_style=LightColorizedStyle)
wm = pygal_maps_world.maps.World(style=wm_style)
wm.title = 'world Population in 2010,by Country'
wm.add('0-10m',cc_pops_1)
wm.add('10m-10b',cc_pops_2)
wm.add('>1bn',cc_pops_3)

wm.render_to_file('world_population.svg')**

使用API

查看github上使用Go语言描述的仓库

import requests
import pygal
from pygal.style import LightColorizedStyle as LCS,LightenStyle as LS

#请求发送到GitHub网站中响应api调用的部分,搜索仓库,查询语言为GO的仓库,以星级排列
url = 'https://api.github.com/search/repositories?q=language:Go&sort=stars'
r = requests.get(url)
#打印响应状态 状态码200为请求成功
print("Status code:",r.status_code)

#将响应存在变量中
response_dict = r.json()
#所有仓库
print("Total repositories:",response_dict['total_count'])
#获得的仓库信息
repo_dicts = response_dict['items']
print("Number of items:",len(repo_dicts))

#研究仓库信息
names,plots_dicts = [],[]
for repo_dict in repo_dicts:
    names.append(repo_dict['name'])
    
    plot_dict = {
        'value':repo_dict['stargazers_count'],
        'label':str(repo_dict['description']),
        'xlink':repo_dict['html_url'],#超链接
    }
    plots_dicts.append(plot_dict)

#可视化
my_style = LS('#333366',base_style=LCS)

my_config = pygal.Config()
my_config.x_label_rotation =45
my_config.show_legend = False
my_config.title_font_size = 24
my_config.label_font_size = 14
my_config.major_label_font_size = 18
my_config.truncate_label = 15
my_config.show_y_guides = False
my_config.width = 1000

chart = pygal.Bar(my_config,style=my_style)
chart.title = 'Most-Starred Python Projects on GitHub'
chart.x_labels = names

chart.add('',plots_dicts)
chart.render_to_file('go_repos.svg')
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值