#encoding=utf-8
import time
import pandas
as pd
import requests
from bs4
import BeautifulSoup
from django.utils.http
import urlquote
from selenium
import webdriver
driver = webdriver.PhantomJS()
base_url =
'https://www.aqistudy.cn/historydata/daydata.php?city='
str_city =
'北京'
def get_month_set():
month_set = list()
for i
in range(
7,
10):
month_set.append((
'2015-0%s' % i))
for i
in range(
10,
13):
month_set.append((
'2015-%s' % i))
for i
in range(
1,
10):
month_set.append((
'2016-0%s' % i))
month_set.append((
'2016-%s' %
10))
month_set.append((
'2016-%s' %
11))
return month_set
def get_city_set():
str_file =
r
'city.txt'
fp = open(str_file,
'rb')
city_set = list()
for line
in fp.readlines():
city_set.append(str(line.strip(),encoding=
'utf-8'))
return city_set
month_set = get_month_set()
city_set = get_city_set()
for city
in city_set:
file_name = city +
'.csv'
time.sleep(
10)
for i
in range(len(month_set)):
str_month = month_set[i]
utf8_city = urlquote(city)
weburl = (
'%s%s&month=%s' % (base_url,utf8_city,str_month))
driver.get(weburl)
dfs = pd.read_html(driver.page_source,header =
0)[
0]
driver.quit()
dfs.to_csv(file_name)
print(
'%d---%s,%s---DONE' % (city_set.index(city), city, str_month))
time.sleep(
10)