import pandas as pd
import lxml
import requests
import csv
from lxml import html
weatherUrl='http://www.weather.com.cn/weather/101280101.shtml'
link2='//li[@class="sky skyid lv{} {}"]'
link1='//li[@class="sky skyid lv{}"]'
#获取目标网页
def getSource(url):
response=requests.get(url)
response.encoding='utf-8'
return response.content #返回网页源代码
selector=lxml.html.document_fromstring(getSource(weatherUrl))
Weather=[]
WeatherLink=[]
WeatherList=[]
for i in range(1,4):
weatherLink1=link1.format(i)
weatherLink2=link2.format(i,'on')
WeatherLink.append(weatherLink1)
WeatherLink.append(weatherLink2)
for j in range(len(WeatherLink)):
for s in range(len(selector.xpath(WeatherLink[j]))):
WeatherList.append(selector.xpath(WeatherLink[j])[s])
for each in We