python实用教程(七):数据可视化

13. numpy和pandas简单使用

#1

import numpy as np

import pandas as pd

# numpy

height = [1.73, 1.68, 1.72]

weight = [56, 62, 75]

npHeight = np.array(height)  # 元素自动转换为同一种类型

npWeight = np.array(weight)

bmi = npWeight/npHeight**2  # 数组对应位置元素相除

print(bmi)

bmi2 = bmi[bmi > 20]

np2D = np.array([[1.73, 1.68, 1.72], [56, 62, 75]])

print(np2D.shape)  # 数据结构(23列)

print(np2D[:, 1:2])  # 提取23

print(np2D[1, :])  # 提取第二行

height2 = np.round(np.random.normal(1.75, 0.5, 1000), 2)

weight2 = np.round(np.random.normal(65, 20, 1000), 2)

npAll = np.column_stack((height2, weight2))

print(npAll[1:10, :])

print(np.corrcoef(npAll[:, 0], npAll[:, 1]))

print(np.std(npAll[:, 1]))

print(np.mean(npAll[:, 0]))

print(np.median(npAll[:, 1]))

# pandas

data = pd.read_csv("customers.csv", index_col=0)  # index_col=0表示行标不在列里面,即第一列不用加行号

print(data["A"])  # 获取A

data["E"] = [1, 2, 3, 4, 5, 6]  # 新加一列

print(data["A"]/data["B"])

print(data.loc["a"])  # 获取a

print(data.loc["a", "A"])  # 获取aA

# 等价于:print(data["A"].loc["a"]) 或者 print(data.loc["a"]["A"])

14. 数据可视化

#1

import matplotlib

import matplotlib.pyplot as plt

input_values = [1,2,3,4,5]

squares = [1,4,9,16,25]

plt.plot(input_values,squares,linewidth=5)\

#plt.title("Square Numbers",fontsize=24)

plt.xlabel("Value",fontsize=14)

plt.ylabel("Square of Value",fontsize=14)

plt.tick_params(axis='both',which='major',labelsize=14) #设置刻度标记的大小

plt.show() #打开matplotlib查看器,并显示绘制的图形

#2

import matplotlib.pyplot as plt

x = list(range(1,1001))

y = [x_value**2 for x_value in x]

plt.scatter(x,y,s=10,edgecolor='none',c=y) #散点图

plt.axis([0,1100,0,1100000])  #设置每个坐标轴的取值范围

#plt.show()

plt.savefig('sp.png',bbox_inches='tight') #将图标保存到文件中

#3(随机漫步)

import matplotlib.pyplot as plt

from random_walk import RandomWalk

#############random_walk.py###############

from random import choice

class RandomWalk():

"""一个生成随机漫步数据的类"""

def __init__(self,num_points=5000):

"""初始化随机漫步的属性"""

self.num_points = num_points

self.x = [0]

self.y = [0] #每次漫步从(0,0)出发

def fill_walk(self):

    """计算随机漫步包含的所有点"""

    while len(self.x)<self.num_points:

        x_direction = choice([1,-1])

        x_distance = choice([0,1,2,3,4])

        x_step = x_direction*x_distance

        y_direction = choice([1,-1])

        y_distance = choice([0,1,2,3,4])

        y_step = y_direction*y_distance

       

        if(x_step==0 and y_step==0): #防止原地踏步

            continue

        next_x = self.x[-1] + x_step

        next_y = self.y[-1] + y_step

        self.x.append(next_x)

        self.y.append(next_y)

###############################################

rw = RandomWalk()

rw.fill_walk()

plt.figure(dpi=128,figsize=(10,6)) #dpi表示分辨率,制定绘图窗口的尺寸

point_numbers = list(range(rw.num_points))

plt.scatter(rw.x,rw.y,c=point_numbers,cmap=plt.cm.Blues,edgecolor='none',s=15)

plt.scatter(0,0,c='green',edgecolor='none',s=100)             #七起点

plt.scatter(rw.x[-1],rw.y[-1],c='red',edgecolor='none',s=100) #终点

plt.axes().get_xaxis().set_visible(False)  #可见性设为False,即隐藏x

plt.axes().get_yaxis().set_visible(False)  #隐藏y

plt.show()

#3.下载数据

#1

import csv #导入csv模块

from matplotlib import pyplot as plt

filename = "sitka_weather_07-2014.csv"

with open(filename) as f:

    reader = csv.reader(f)    #创建与文件相关联的阅读器对象

header_row = next(reader) #调用一次,得到文件的第一行

for index,column_header in enumerate(header_row): #enumerate()获取索引及其值

    print(index,column_header)

highs = []

for row in reader:

    high = int(row[1])

    highs.append(high)

print(highs)

fig = plt.figure(dpi=128,figsize=(10,6))

plt.plot(highs,c='red')

plt.show()

#2

import csv

from matplotlib import pyplot as plt

from datetime import datetime

first_date = datetime.strptime('2014-7-1','%Y-%m-%d') #转换为相应日期的对象

print(first_date)

filename = "sitka_weather_07-2014.csv"

with open(filename) as f:

    reader = csv.reader(f)    #创建与文件相关联的阅读器对象

header_row = next(reader) #调用一次,得到文件的第一行

dates,highs,lows = [],[],[]

for row in reader:

    try:

        current_date = datetime.strptime(row[0],'%Y-%m-%d')

        high = int(row[1])

        low = int(row[2])

    except ValueError:

        print(current_date,'missing data')

else:

    dates.append(current_date)

    highs.append(high)

    lows.append(low)

fig = plt.figure(dpi=128,figsize=(10,6))

plt.plot(dates,highs,c='red',alpha=0.5) #alpha制定颜色的透明度

plt.plot(dates,lows,c='blue',alpha=0.5)

plt.fill_between(dates,highs,lows,facecolor='blue',alpha=0.1) #facecolor填充区域的颜色

fig.autofmt_xdate()

plt.show()

#3

import json

#将数据加载到一个列表中

filename = 'population_data.json'

with open(filename) as f:

    pop_data = json.load(f)

#打印每个国家2010年的人口数量

for pop_dict in pop_data:

    if pop_dict['Year'] == '2010':

        country_name = pop_dict['Country Name']

        population = int(float(pop_dict['Value'])) #因为有些数是小数,不能直接转换为整数

        print(country_name + ": " + str(population))

#python -m  pip install --user pygal==1.7 #安装pygal

from pygal.i18n import COUNTRIES

for country_code in sorted(COUNTRIES.keys()):

    print(country_code,COUNTRIES[country_code]) #打印国别码

def get_country_code(country_name):

    for code,name in COUNTRIES.items():

        if name == country_name:

            return code

    return None #如果没有找到指定的国家就返回None

print(get_country_code('Andorra'))

#3.使用API

#浏览器输入:https://api.github.com/search/repositories?q=language:python&sort=stars

#获取主要语言为Python的仓库信息,并按照获得的星级排序(sort=stars)

#安装requests包:

#pip install --user requests

#1

import requests

import pygal

from pygal.style import LightColorizedStyle as LCS,LightenStyle as LS

url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'

r = requests.get(url)

print("Status code:",r.status_code)

response_dict = r.json()

print(response_dict.keys())

repo_dicts = response_dict['items']

repo_dict = repo_dicts[0] #第一个仓库

print('\nKeys:',len(repo_dict))

for key in sorted(repo_dict.keys()):

    print(key)

print("Selected information about each repository:")

for repo_dict in repo_dicts:

    print('\nName:',repo_dict['name'])

    print('Owner:',repo_dict['owner']['login'])

    print('Stars:',repo_dict['stargazers_count'])

    print('Repository:',repo_dict['html_url'])

    print('Description:',repo_dict['description'])

names,plot_dicts = [],[]

for repo_dict in repo_dicts:

    names.append(repo_dict['name'])

    plot_dict = {'value':repo_dict[stargazers_count],'label':repo_dict[description],'xlink':repo_dict['html_url']}

#添加工具提示,label表示鼠标悬停提示,xlink表示单击链接

    plot_dicts.append(plot_dict)

#可视化

my_style = LS('#333366',base_style = LCS)

chart = pygal.Bar(style=my_style,x_label_rotation=45,show_legend=False) #条形图,标签旋转45度,隐藏图例

chart.title = 'Most-Starred Python Projects on Github'

chart.x_labels = names

chart.add('',plot_dicts) #标签设置为空字符

chart.render_to_file('python_repos.svg')

#查看搜索API的速率限制:

#浏览器中输入:https://api.github.com/rate_limit

#2

#浏览器中输入:https://hacker-news.firebaseio.com/v0/item/9884165.json

import requests

from operator import itemgetter

url = 'https://hacker-news.firebaseio.com/v0/item/9884165.json'

r = requests.get(url)

print('Status code:',r.status_code)

submission_ids = r.json()

submission_dicts = []

for submission_id in submission_ids[:30]:

    url = ('https://hacker-news.firebaseio.com/v0/item/' + str(submission_id) + '.json')

    submission_r = requests.get(url)

    print(submission_r.status_code)

    response_dict = submission_r.json()

    submission_dict = {'title':response_dict['title'],

                  'link':'https://news.ycombinator.com/item?id=' + str(submission_id),

                  'comments':response_dict.get('descendants',0)}

    submission_dicts.append(submission_dict)

submission_dicts = sorted(submission_dicts,key=itemgetter('comments'),reverse=True)

for submission_dict in submission_dicts:

    print('\nTitle:',submission_dict['title'])

    print('Discussion link:',submission_dict['link'])

    print('Comments',submission_dict['comments'])

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Trisyp

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值