python实用教程（七）：数据可视化

Trisyp

已于 2024-08-20 14:23:48 修改

阅读量2.3k

点赞数

分类专栏： Python保姆级教程文章标签： python

于 2017-04-24 16:05:17 首次发布

本文链接：https://blog.csdn.net/Trisyp/article/details/70598868

版权

Python保姆级教程专栏收录该内容

9 篇文章 0 订阅

订阅专栏

13. numpy和pandas简单使用

#例1

import numpy as np

import pandas as pd

# numpy

height = [1.73, 1.68, 1.72]

weight = [56, 62, 75]

npHeight = np.array(height) # 元素自动转换为同一种类型

npWeight = np.array(weight)

bmi = npWeight/npHeight**2 # 数组对应位置元素相除

print(bmi)

bmi2 = bmi[bmi > 20]

np2D = np.array([[1.73, 1.68, 1.72], [56, 62, 75]])

print(np2D.shape) # 数据结构（2行3列）

print(np2D[:, 1:2]) # 提取2、3列

print(np2D[1, :]) # 提取第二行

height2 = np.round(np.random.normal(1.75, 0.5, 1000), 2)

weight2 = np.round(np.random.normal(65, 20, 1000), 2)

npAll = np.column_stack((height2, weight2))

print(npAll[1:10, :])

print(np.corrcoef(npAll[:, 0], npAll[:, 1]))

print(np.std(npAll[:, 1]))

print(np.mean(npAll[:, 0]))

print(np.median(npAll[:, 1]))

# pandas

data = pd.read_csv("customers.csv", index_col=0) # index_col=0表示行标不在列里面，即第一列不用加行号

print(data["A"]) # 获取A列

data["E"] = [1, 2, 3, 4, 5, 6] # 新加一列

print(data["A"]/data["B"])

print(data.loc["a"]) # 获取a行

print(data.loc["a", "A"]) # 获取a行A列

# 等价于：print(data["A"].loc["a"]) 或者 print(data.loc["a"]["A"])

14. 数据可视化

#例1

import matplotlib

import matplotlib.pyplot as plt

input_values = [1,2,3,4,5]

squares = [1,4,9,16,25]

plt.plot(input_values,squares,linewidth=5)\

#plt.title("Square Numbers",fontsize=24)

plt.xlabel("Value",fontsize=14)

plt.ylabel("Square of Value",fontsize=14)

plt.tick_params(axis='both',which='major',labelsize=14) #设置刻度标记的大小

plt.show() #打开matplotlib查看器，并显示绘制的图形

#例2

import matplotlib.pyplot as plt

x = list(range(1,1001))

y = [x_value**2 for x_value in x]

plt.scatter(x,y,s=10,edgecolor='none',c=y) #散点图

plt.axis([0,1100,0,1100000]) #设置每个坐标轴的取值范围

#plt.show()

plt.savefig('sp.png',bbox_inches='tight') #将图标保存到文件中

#例3(随机漫步)

import matplotlib.pyplot as plt

from random_walk import RandomWalk

#############random_walk.py###############

from random import choice

class RandomWalk():

"""一个生成随机漫步数据的类"""

def __init__(self,num_points=5000):

"""初始化随机漫步的属性"""

self.num_points = num_points

self.x = [0]

self.y = [0] #每次漫步从(0,0)出发

def fill_walk(self):

"""计算随机漫步包含的所有点"""

while len(self.x)<self.num_points:

x_direction = choice([1,-1])

x_distance = choice([0,1,2,3,4])

x_step = x_direction*x_distance

y_direction = choice([1,-1])

y_distance = choice([0,1,2,3,4])

y_step = y_direction*y_distance

if(x_step==0 and y_step==0): #防止原地踏步

continue

next_x = self.x[-1] + x_step

next_y = self.y[-1] + y_step

self.x.append(next_x)

self.y.append(next_y)

###############################################

rw = RandomWalk()

rw.fill_walk()

plt.figure(dpi=128,figsize=(10,6)) #dpi表示分辨率，制定绘图窗口的尺寸

point_numbers = list(range(rw.num_points))

plt.scatter(rw.x,rw.y,c=point_numbers,cmap=plt.cm.Blues,edgecolor='none',s=15)

plt.scatter(0,0,c='green',edgecolor='none',s=100) #七起点

plt.scatter(rw.x[-1],rw.y[-1],c='red',edgecolor='none',s=100) #终点

plt.axes().get_xaxis().set_visible(False) #可见性设为False，即隐藏x轴

plt.axes().get_yaxis().set_visible(False) #隐藏y轴

plt.show()

#3.下载数据

#例1

import csv #导入csv模块

from matplotlib import pyplot as plt

filename = "sitka_weather_07-2014.csv"

with open(filename) as f:

reader = csv.reader(f) #创建与文件相关联的阅读器对象

header_row = next(reader) #调用一次，得到文件的第一行

for index,column_header in enumerate(header_row): #enumerate()获取索引及其值

print(index,column_header)

highs = []

for row in reader:

high = int(row[1])

highs.append(high)

print(highs)

fig = plt.figure(dpi=128,figsize=(10,6))

plt.plot(highs,c='red')

plt.show()

#例2

import csv

from matplotlib import pyplot as plt

from datetime import datetime

first_date = datetime.strptime('2014-7-1','%Y-%m-%d') #转换为相应日期的对象

print(first_date)

filename = "sitka_weather_07-2014.csv"

with open(filename) as f:

reader = csv.reader(f) #创建与文件相关联的阅读器对象

header_row = next(reader) #调用一次，得到文件的第一行

dates,highs,lows = [],[],[]

for row in reader:

try:

current_date = datetime.strptime(row[0],'%Y-%m-%d')

high = int(row[1])

low = int(row[2])

except ValueError:

print(current_date,'missing data')

else:

dates.append(current_date)

highs.append(high)

lows.append(low)

fig = plt.figure(dpi=128,figsize=(10,6))

plt.plot(dates,highs,c='red',alpha=0.5) #alpha制定颜色的透明度

plt.plot(dates,lows,c='blue',alpha=0.5)

plt.fill_between(dates,highs,lows,facecolor='blue',alpha=0.1) #facecolor填充区域的颜色

fig.autofmt_xdate()

plt.show()

#例3

import json

#将数据加载到一个列表中

filename = 'population_data.json'

with open(filename) as f:

pop_data = json.load(f)

#打印每个国家2010年的人口数量

for pop_dict in pop_data:

if pop_dict['Year'] == '2010':

country_name = pop_dict['Country Name']

population = int(float(pop_dict['Value'])) #因为有些数是小数，不能直接转换为整数

print(country_name + ": " + str(population))

#python -m pip install --user pygal==1.7 #安装pygal

from pygal.i18n import COUNTRIES

for country_code in sorted(COUNTRIES.keys()):

print(country_code,COUNTRIES[country_code]) #打印国别码

def get_country_code(country_name):

for code,name in COUNTRIES.items():

if name == country_name:

return code

return None #如果没有找到指定的国家就返回None

print(get_country_code('Andorra'))

#3.使用API

#浏览器输入：https://api.github.com/search/repositories?q=language:python&sort=stars

#获取主要语言为Python的仓库信息，并按照获得的星级排序(sort=stars)

#安装requests包：

#pip install --user requests

#例1

import requests

import pygal

from pygal.style import LightColorizedStyle as LCS,LightenStyle as LS

url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'

r = requests.get(url)

print("Status code:",r.status_code)

response_dict = r.json()

print(response_dict.keys())

repo_dicts = response_dict['items']

repo_dict = repo_dicts[0] #第一个仓库

print('\nKeys:',len(repo_dict))

for key in sorted(repo_dict.keys()):

print(key)

print("Selected information about each repository:")

for repo_dict in repo_dicts:

print('\nName:',repo_dict['name'])

print('Owner:',repo_dict['owner']['login'])

print('Stars:',repo_dict['stargazers_count'])

print('Repository:',repo_dict['html_url'])

print('Description:',repo_dict['description'])

names,plot_dicts = [],[]

for repo_dict in repo_dicts:

names.append(repo_dict['name'])

plot_dict = {'value':repo_dict[stargazers_count],'label':repo_dict[description],'xlink':repo_dict['html_url']}

#添加工具提示，label表示鼠标悬停提示，xlink表示单击链接

plot_dicts.append(plot_dict)

#可视化

my_style = LS('#333366',base_style = LCS)

chart = pygal.Bar(style=my_style,x_label_rotation=45,show_legend=False) #条形图，标签旋转45度，隐藏图例

chart.title = 'Most-Starred Python Projects on Github'

chart.x_labels = names

chart.add('',plot_dicts) #标签设置为空字符

chart.render_to_file('python_repos.svg')

#查看搜索API的速率限制：

#浏览器中输入：https://api.github.com/rate_limit

#例2

#浏览器中输入：https://hacker-news.firebaseio.com/v0/item/9884165.json

import requests

from operator import itemgetter

url = 'https://hacker-news.firebaseio.com/v0/item/9884165.json'

r = requests.get(url)

print('Status code:',r.status_code)

submission_ids = r.json()

submission_dicts = []

for submission_id in submission_ids[:30]:

url = ('https://hacker-news.firebaseio.com/v0/item/' + str(submission_id) + '.json')

submission_r = requests.get(url)

print(submission_r.status_code)

response_dict = submission_r.json()

submission_dict = {'title':response_dict['title'],

'link':'https://news.ycombinator.com/item?id=' + str(submission_id),

'comments':response_dict.get('descendants',0)}

submission_dicts.append(submission_dict)

submission_dicts = sorted(submission_dicts,key=itemgetter('comments'),reverse=True)

for submission_dict in submission_dicts:

print('\nTitle:',submission_dict['title'])

print('Discussion link:',submission_dict['link'])

print('Comments',submission_dict['comments'])

Trisyp

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
1
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录