- 然而,照样输入程序会报错:
- provinces=content1.split(',')
- TypeError: a bytes-like object is required, not 'str'
- 于是,加了一句就OK啦,转化为字符串:content1=str(urllib.request.urlopen(url1).read(),encoding='utf-8')
- 得到的是一个list
- 同理抓取省份的城市,城市的地区信息
-
import urllib.request result='city={\n' url1= 'http://m.weather.com.cn/data3/city.xml' content1 = urllib.request.urlopen(url1).read()# bytes #抓取省份信息,获得省份编号 content1=str(urllib.request.urlopen(url1).read(),encoding='utf-8') provinces=content1.split(',') url = 'http://m.weather.com.cn/data3/city%s.xml' for p in provinces:#抓取城市信息 p_code = p.split('|')[0] url2 = url % p_code content2 = str(urllib.request.urlopen(url2).read(),encoding='utf-8') cities = content2.split(',') for c in cities:#抓取地区信息 c_code = c.split('|')[0] url3 = url % c_code #把字符串中的%s 替换为c_code content3 = str(urllib.request.urlopen(url3).read(),encoding='utf-8') districts = content3.split(',') for d in districts:#获得地区最终编号 d_pair = d.split('|') d_code = d_pair[0]#编号 name = d_pair[1]#地区名 url4 = url % d_code content4 = str(urllib.request.urlopen(url4).read(),encoding='utf-8') code = content4.split('|')[1] line = " '%s': '%s',\n" % (name, code) result += line result+='}' with open('getcity.py','w') as f: f.write(result)
结果:海淀:101010200
朝阳:101010300
顺义:101010400
怀柔:101010500
通州:101010600
昌平:101010700
延庆:101010800
丰台:101010900等