获取数据并写入xls文件
import csv import random import requests from lxml import etree # 城市列表如下: # http://hebei.weather.com.cn/m2/j/hebei/public/city.min.js # 目前支持北京、天津、重庆三个城市7天天气预报 # 支持河南天气更新 # 18点后获取天气预报将get_text()方法中的0改为1 headers = [ "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 " "Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 " "Safari/537.75.14", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)", 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', 'Opera/9.25 (Windows NT 5.1; U; en)', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12', 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9', "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 " "Chrome/16.0.912.77 Safari/535.7", "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 "] def get_province(): url = 'http://www.weather.com.cn/province/' r = requests.get(url, headers={'User-Agent': headers[random.randint(1, 11)]}) # encode解码,将ISO-8859-1解码成unicode html = r.text.encode("ISO-8859-1") # decode编码,将unicode编码成utf-8 html = html.decode("utf-8") html1 = etree.HTML(html) data = html1.xpath('/html/body/div[2]/div[2]/ul/li/a') list_province = [] for i in data: item = {'省辖市': i.text, '链接': i.get('href')} list_province.append(item) return list_province def get_city_link(ul, ulink, list_weather): ul = ul ulink = ulink if ul in list_weather: url = ulink r = requests.get(url, headers={'User-Agent': headers[random.randint(1, 11)]}) # encode解码,将ISO-8859-1解码成unicode html = r.text.encode("ISO-8859-1") # decode编码,将unicode编码成utf-8 html = html.decode("utf-8") html1 = etree.HTML(html) return html1 else: pass def get_special(ulink): url = ulink r = requests.get(url, headers={'User-Agent': headers[random.randint(1, 11)]}) # encode解码,将ISO-8859-1解码成unicode html = r.text.encode("ISO-8859-1") # decode编码,将unicode编码成utf-8 html = html.decode("utf-8") html1 = etree.HTM