AQI 爬虫程序

#encoding=utf-8
import time

import pandas as pd
import requests
from bs4 import BeautifulSoup
from django.utils.http import urlquote
from selenium import webdriver

driver = webdriver.PhantomJS()

base_url = 'https://www.aqistudy.cn/historydata/daydata.php?city='
str_city = '北京'

def get_month_set():
month_set = list()
for i in range( 7, 10):
month_set.append(( '2015-0%s' % i))
for i in range( 10, 13):
month_set.append(( '2015-%s' % i))
for i in range( 1, 10):
month_set.append(( '2016-0%s' % i))
month_set.append(( '2016-%s' % 10))
month_set.append(( '2016-%s' % 11))
return month_set

def get_city_set():
str_file = r 'city.txt'
fp = open(str_file, 'rb')
city_set = list()
for line in fp.readlines():
city_set.append(str(line.strip(),encoding= 'utf-8'))
return city_set

month_set = get_month_set()
city_set = get_city_set()

for city in city_set:
file_name = city + '.csv'
time.sleep( 10)
for i in range(len(month_set)):
str_month = month_set[i]
utf8_city = urlquote(city)
weburl = ( '%s%s&month=%s' % (base_url,utf8_city,str_month))

driver.get(weburl)
dfs = pd.read_html(driver.page_source,header = 0)[ 0]
driver.quit()

dfs.to_csv(file_name)
print( '%d---%s,%s---DONE' % (city_set.index(city), city, str_month))
time.sleep( 10)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值