from lxml import etree
from fake_useragent import UserAgent
import time
import requests
import csv
import random
headers={
'User-Agent':UserAgent().random
}
names=["年","月","日","最低温(℃)","最高温(℃)","天气","风向","级数"]
with open('清远.csv', 'a', newline='', encoding='utf-8-sig') as fp:
writer = csv.writer(fp)
writer.writerow(names)
month=["01","02","03","04","05","06","07","08","09","10","11","12"]
year=["11","12","13","14","15","16","17","18","19","20","21"]
for y in year:
for m in month:
#https://lishi.tianqi.com/qinhuangdao/20{}{}.html
#https://lishi.tianqi.com/fangchenggang/20{}{}.html
#https://lishi.tianqi.com/tianjin/20{}{}.html
#https://lishi.tianqi.com/jingtanggang/20{}{}.html
#https://lishi.tianqi.com/ningbo/20{}{}.html
#https://lishi.tianqi.com/guangzhou/20{}{}.html
#http://lishi.tianqi.com/caofeidian/20{}{}.html
url='https://lishi.tianqi.com/qingyuan3/20{}{}.html'.format(y,m)
print(url)
response=requests.get(url=url,headers=headers)
page_text=response.text
tree=etree.HTML(page_text)
div_list=tree.xpath('/html/body/div[7]/div[1]/div[4]/ul/li')
for div in div_list:
times=div.xpath('./div/text()')[0].split(" ")[0].split("-")
ma=div.xpath('./div/text()')[1].replace("℃","")
mi=div.xpath('./div/text()')[2].replace("℃","")
weather=div.xpath('./div/text()')[3]
wind=div.xpath('./div/text()')[4].split(" ")[0]
num=div.xpath('./div/text()')[4].split(" ")[1].replace("级","")
times.append(ma)
times.append(mi)
times.append(weather)
times.append(wind)
times.append(num)
with open('清远.csv', 'a', newline='', encoding='utf-8-sig') as fp:
writer = csv.writer(fp)
writer.writerow(times)
time.sleep(float(format(random.uniform(0,2), '.2f')))
爬虫实战(天气)
最新推荐文章于 2024-06-14 10:31:03 发布