from lxml import etree
from pyquery import PyQuery as pq
from fake_useragent import UserAgent
import time
import json
import requests
import csv
headers={
'User-Agent':UserAgent().random
}
url='http://www.stats.gov.cn/tjsj/pcsj/rkpc/6rp/html/B0203.htm'
response=requests.get(url=url,headers=headers)
# response.encoding = 'GBK' # 改变编码
# response.encoding = 'utf-8'
#response.encoding = 'gb2312'
page_text=response.text
tree=etree.HTML(page_text)
div_list=tree.xpath('/html/body/table//tr')
for div in div_list:
try:
title0=div.xpath('./td//text()')
if(title0[1]=='\xa0 '):
del title0[1]
title0[0]=title0[0]+title0[1]
del title0[1]
print(title0)
except:pass
乱码风格如上,把注释的两行随机试试