python下载数据的学习

学习来源

CSV文件格式

将数据作为一系列以逗号分隔的值写入文件,这样的文件称为CSV文件。

  • 分析CSV文件头
    模块CSV的reader类中包含next()方法,调用内置函数next()并将一个reader作为参数传递给它时,将调用reader的next()方法,从而返回文件中下一行。
import csv
filename='sitka_weather_07-2014.csv'
with open(filename) as f:
	reader=csv.reader(f)
	header_row=next(reader)
	print(header_row)

输出结果

['AKDT', 'Max TemperatureF', 'Mean TemperatureF', 'Min TemperatureF', 'Max Dew PointF', 'MeanDew PointF', 'Min DewpointF', 'Max Humidity', ' Mean Humidity', ' Min Humidity', ' Max Sea Level PressureIn', ' Mean Sea Level PressureIn', ' Min Sea Level PressureIn', ' Max VisibilityMiles', ' Mean VisibilityMiles', ' Min VisibilityMiles', ' Max Wind SpeedMPH', ' Mean Wind SpeedMPH', ' Max Gust SpeedMPH', 'PrecipitationIn', ' CloudCover', ' Events', ' WindDirDegrees']
  • 打印文件头以及位置
    调用enumerate()来获取每个元素的索引以及其值。
import csv
filename='sitka_weather_07-2014.csv'
with open(filename) as f:
	reader=csv.reader(f)
	header_row=next(reader)
	
	for index,column_header in enumerate(header_row):
		print(index,column_header)

输出结果

0 AKDT
1 Max TemperatureF
2 Mean TemperatureF
3 Min TemperatureF
4 Max Dew PointF
5 MeanDew PointF
6 Min DewpointF
7 Max Humidity
8  Mean Humidity
9  Min Humidity
10  Max Sea Level PressureIn
11  Mean Sea Level PressureIn
12  Min Sea Level PressureIn
13  Max VisibilityMiles
14  Mean VisibilityMiles
15  Min VisibilityMiles
16  Max Wind SpeedMPH
17  Mean Wind SpeedMPH
18  Max Gust SpeedMPH
19 PrecipitationIn
20  CloudCover
21  Events
22  WindDirDegrees
  • 提取并读取文件
    阅读器对象从其停留的地方继续往下读取CSV文件,每次都自动放回当前位置的下一行。读取数据的第一列代码。
import csv
filename='sitka_weather_07-2014.csv'
with open(filename) as f:
	reader=csv.reader(f)
	header_row=next(reader)	
	highs=[]
	for row in reader:
		highs.append(row[1])
	print(highs)

输出结果

['64', '71', '64', '59', '69', '62', '61', '55', '57', '61', '57', '59', '57', '61', '64', '61', '59', '63', '60', '57', '69', '63', '62', '59', '57', '57', '61', '59', '61', '61', '66']
  • 绘制气温图表
import csv
from matplotlib import pyplot as plt
filename='sitka_weather_07-2014.csv'
with open(filename) as f:
	reader=csv.reader(f)
	header_row=next(reader)	
	
	#获取最高气温
	highs=[]
	for row in reader:
		highs.append(row[1])
	print(highs)
	
	#绘制图形
	fig=plt.figure(dpi=128,figsize=(10,6))
	plt.plot(highs,c='red')
	plt.title("Daily high temperatures,July 2014",fontsize=24)
	plt.xlabel('',fontsize=16)
	plt.ylabel("Temperature(F)",fontsize=16)
	plt.tick_params(axis='both',which='major',labelsize=16)
	plt.show()

输出结果
在这里插入图片描述

  • 模块datetime
    模块datetime中的方法strptime()可以接受各种实参。
实参含义
%A星期名,Monday
%B月份名,January
%m用数字表示月份,01~12
%d用数字表示月份中的一天,01~31
%Y四位数的年份,2021
%y二位数的年份,21
%H24小时制
%I12小时制
%pam或pm
%M分钟数
%S秒数
  • 图表中添加日期
import csv
from datetime import datetime
from matplotlib import pyplot as plt
filename='sitka_weather_07-2014.csv'
with open(filename) as f:
	reader=csv.reader(f)
	header_row=next(reader)	
	
	#获取日期和最高气温
	dates,highs=[],[]
	for row in reader:
		current_date=datetime.strptime(row[0],"%Y-%m-%d")
		dates.append(current_date)
		high=int(row[1])
		highs.append(high)
	print(highs)
	
	#绘制图形
	fig=plt.figure(dpi=128,figsize=(10,6))
	plt.plot(dates,highs,c='red')
	plt.title("Daily high temperatures,July 2014",fontsize=24)
	plt.xlabel('',fontsize=16)
	fig.autofmt_xdate()
	plt.ylabel("Temperature(F)",fontsize=16)
	plt.tick_params(axis='both',which='major',labelsize=16)
	plt.show()

输出结果
在这里插入图片描述

  • 绘制更多数据
import csv
from datetime import datetime
from matplotlib import pyplot as plt
filename='sitka_weather_2014.csv'
with open(filename) as f:
	reader=csv.reader(f)
	header_row=next(reader)	
	
	#获取日期和最高气温和最低气温
	dates,highs,lows=[],[],[]
	for row in reader:
		current_date=datetime.strptime(row[0],"%Y-%m-%d")
		dates.append(current_date)
		high=int(row[1])
		highs.append(high)
		low=int(row[3])
		lows.append(low)
	print(highs)
	
	#绘制图形
	fig=plt.figure(dpi=128,figsize=(10,6))
	plt.plot(dates,highs,c='red')
	plt.plot(dates,lows,c='blue')
	plt.title("Daily high and low temperatures, 2014",fontsize=24)
	plt.xlabel('',fontsize=16)
	fig.autofmt_xdate()
	plt.ylabel("Temperature(F)",fontsize=16)
	plt.tick_params(axis='both',which='major',labelsize=16)
	plt.show()

输出结果
在这里插入图片描述

  • 区域着色
    使用fill_between()填充两个y值之间的空间。alpha指定颜色的透明度。
import csv
from datetime import datetime
from matplotlib import pyplot as plt
filename='sitka_weather_2014.csv'
with open(filename) as f:
	reader=csv.reader(f)
	header_row=next(reader)	
	
	#获取日期和最高气温和最低气温
	dates,highs,lows=[],[],[]
	for row in reader:
		current_date=datetime.strptime(row[0],"%Y-%m-%d")
		dates.append(current_date)
		high=int(row[1])
		highs.append(high)
		low=int(row[3])
		lows.append(low)
	print(highs)
	
	#绘制图形
	fig=plt.figure(dpi=128,figsize=(10,6))
	#区域着手
	plt.plot(dates,highs,c='red',alpha=0.5)
	plt.plot(dates,lows,c='blue',alpha=0.5)
	plt.fill_between(dates,highs,lows,facecolor='blue',alpha=0.1)
	plt.title("Daily high and low temperatures, 2014",fontsize=24)
	plt.xlabel('',fontsize=16)
	fig.autofmt_xdate()
	plt.ylabel("Temperature(F)",fontsize=16)
	plt.tick_params(axis='both',which='major',labelsize=16)
	plt.show()

输出结果在这里插入图片描述

  • 错误检查
    当有数据缺失时,应该使用try-except-else进行异常处理。
    文件数据缺失报错
Traceback (most recent call last):
  File "highs_lows.py", line 14, in <module>
    high=int(row[1])
ValueError: invalid literal for int() with base 10: ''

处理方法

import csv
from datetime import datetime
from matplotlib import pyplot as plt
filename='death_valley_2014.csv'
with open(filename) as f:
	reader=csv.reader(f)
	header_row=next(reader)	
	
	#获取日期和最高气温和最低气温
	dates,highs,lows=[],[],[]
	for row in reader:
		try:
			current_date=datetime.strptime(row[0],"%Y-%m-%d")
			high=int(row[1])
			low=int(row[3])
		except ValueError:
			print(current_date,'missing date')
		else:
			dates.append(current_date)
			highs.append(high)
			lows.append(low)
	print(highs)
	
	#绘制图形
	fig=plt.figure(dpi=128,figsize=(10,6))
	#区域着手
	plt.plot(dates,highs,c='red',alpha=0.5)
	plt.plot(dates,lows,c='blue',alpha=0.5)
	plt.fill_between(dates,highs,lows,facecolor='blue',alpha=0.1)
	plt.title("Daily high and low temperatures, 2014",fontsize=24)
	plt.xlabel('',fontsize=16)
	fig.autofmt_xdate()
	plt.ylabel("Temperature(F)",fontsize=16)
	plt.tick_params(axis='both',which='major',labelsize=16)
	plt.show()

在这里插入图片描述

JSON格式

  • 使用urlopen下载数据
from __future__ import absolute_import, division, print_function
try:
	#python2
	from urllib2 import urlopen
except ImportError:
	#python3
	from urllib.request import urlopen
import json
json_url='https://raw.githubusercontent.com/muxuezi/btc/master/btc_close_2017.json'
response=urlopen(json_url)
req=response.read()
with open('btc_close_2017_urllib.json','wb') as f:
	f.write(req)
file_urllib=json.loads(req)
print(file_urllib)
  • 提取数据
import json
filename='btc_close_2017.json'
with open(filename) as f:
	btc_data=json.load(f)
for btc_dict in btc_data:
	date=btc_dict['date']
	month=btc_dict['month']
	week=btc_dict['week']
	weekday=btc_dict['weekday']
	close=btc_dict['close']
	print("{}is moth {} week{},{}, the close price is {}RMB".format(date,month,week,weekday,close))

输出结果

2017-01-01is moth 01 week52,Sunday, the close price is 6928.6492RMB
2017-01-02is moth 01 week1,Monday, the close price is 7070.2554RMB
2017-01-03is moth 01 week1,Tuesday, the close price is 7175.1082RMB
2017-01-04is moth 01 week1,Wednesday, the close price is 7835.7615RMB
2017-01-05is moth 01 week1,Thursday, the close price is 6928.7578RMB
2017-01-06is moth 01 week1,Friday, the close price is 6196.6928RMB
2017-01-07is moth 01 week1,Saturday, the close price is 6262.1471RMB
2017-01-08is moth 01 week1,Sunday, the close price is 6319.9404RMB
2017-01-09is moth 01 week2,Monday, the close price is 6239.1506RMB
2017-01-10is moth 01 week2,Tuesday, the close price is 6263.1548RMB
2017-01-11is moth 01 week2,Wednesday, the close price is 5383.0598RMB
2017-01-12is moth 01 week2,Thursday, the close price is 5566.7345RMB
2017-01-13is moth 01 week2,Friday, the close price is 5700.0716RMB
2017-01-14is moth 01 week2,Saturday, the close price is 5648.6897RMB
2017-01-15is moth 01 week2,Sunday, the close price is 5674.7977RMB
2017-01-16is moth 01 week3,Monday, the close price is 5730.0658RMB
2017-01-17is moth 01 week3,Tuesday, the close price is 6202.9704RMB
2017-01-18is moth 01 week3,Wednesday, the close price is 6047.6601RMB
2017-01-19is moth 01 week3,Thursday, the close price is 6170.8433RMB
2017-01-20is moth 01 week3,Friday, the close price is 6131.2511RMB
2017-01-21is moth 01 week3,Saturday, the close price is 6326.3657RMB
2017-01-22is moth 01 week3,Sunday, the close price is 6362.9482RMB
2017-01-23is moth 01 week4,Monday, the close price is 6255.5602RMB
2017-01-24is moth 01 week4,Tuesday, the close price is 6074.8333RMB
2017-01-25is moth 01 week4,Wednesday, the close price is 6154.6958RMB
2017-01-26is moth 01 week4,Thursday, the close price is 6295.3388RMB
2017-01-27is moth 01 week4,Friday, the close price is 6320.7206RMB
2017-01-28is moth 01 week4,Saturday, the close price is 6332.5389RMB
2017-01-29is moth 01 week4,Sunday, the close price is 6289.1698RMB
  • 绘制折线图
import json
import pygal
filename='btc_close_2017.json'
with open(filename) as f:
	btc_data=json.load(f)
dates=[]
months=[]
weeks=[]
weekdays=[]
close=[]
for btc_dict in btc_data:
	dates.append(btc_dict['date'])
	months.append(btc_dict['month'])
	weeks.append(btc_dict['week'])
	weekdays.append(btc_dict['weekday'])
	close.append(int(float(btc_dict['close'])))
	
#绘制折线图
line_chart=pygal.Line(x_label_rotation=20,show_minor_x_labels=False)
line_chart.title='收盘价(¥)'
line_chart.x_labels=dates
N=20
line_chart.x_labels_major=dates[::N]
line_chart.add('收盘价',close)
line_chart.render_to_file('收盘价曲线图(¥).svg')

在这里插入图片描述

  • 预测趋势
import json
import pygal
import math
filename='btc_close_2017.json'
with open(filename) as f:
	btc_data=json.load(f)
dates=[]
months=[]
weeks=[]
weekdays=[]
close=[]
for btc_dict in btc_data:
	dates.append(btc_dict['date'])
	months.append(btc_dict['month'])
	weeks.append(btc_dict['week'])
	weekdays.append(btc_dict['weekday'])
	close.append(int(float(btc_dict['close'])))
	
#绘制折线图
line_chart=pygal.Line(x_label_rotation=20,show_minor_x_labels=False)
line_chart.title='收盘价(¥)'
line_chart.x_labels=dates
N=20
line_chart.x_labels_major=dates[::N]
close_log=[math.log10(_) for _ in close]
line_chart.add('收盘价',close_log)
line_chart.render_to_file('收盘价曲线图(¥).svg')

在这里插入图片描述
代码和所用文件下载

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值