import pandas as pd
import numpy as np
import re
import os
import json
#import js2py
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlencode
import time #用于时间的停顿
import random
接着取出想要爬取的城市名
file = pd.read_csv(r'C:\Users\信息明细表.csv').loc[:,['service_date','city','service_result']]
city_name = list(file.groupby('city').city.count().index)
city_name
output:
[‘三亚市’,
‘三门峡市’,
‘上海市’,
‘上饶市’,
‘东莞市’,
‘东营市’,
…]
由于发现2345天气的城市 不带“市”,所以去掉
city=[]
for i in city_name:
r = re.sub('市', '', i)
city.append(r)
找出城市对应的ID
def get_areaid(city):
url = 'http://tianqi.2345.com/tqpcimg/tianqiimg/theme4/js/citySelectData2.js'
r = requests.get(url)
print(r.encoding, r.apparent_encoding)
r.encoding = r.apparent