报错的信息如下:
UnicodeEncodeError: 'gbk' codec can't encode character 'xa0' in position 21: illegal multibyte sequence
代码信息如下:
import urllib.request
import re
import csv
import datetime
import time
import requests
starttime = datetime.datetime.now()
city=input('请输入城市的首字母,例如:北京-BJ(不区分大小写):')
print('下载中...')
#列表
results=[]
for number in range(100):
url='http://'+city+'.liepin.com/zhaopin/pn'+str(number)+'/'
#模拟浏览器
response=requests.get(url,timeout=3)
html=response.text
if response.encoding=='UTF-8':
#正则表达式
pattern=re.compile('
.*?job-info.*?href="(.*?)".*? (.*?).*?clearfix" title="(.*?)".*?(.*?).*?company-name.*?"公司(.*?)"',re.S)items=re.findall(pattern,html)
results.append(items)
print('当前下载第:'+str(number+1)+'页!')
else:
pass
#输出csv
csvfile=open(city+time.strftime('%m%d') +'.csv','w+',newline ='')
try:
writer=csv.writer(csvfile)
writer.writerow((['公司名称','职位名称','职位链接','职位说明','发布时间']))
for n in range(len(results)):
for m in range(len(results[n])):
writer.writerow([results[n][m][4],results[n][m][1],results[n][m][0],results[n][m][2],results[n][m][3]])
finally:
csvfile.close()