代码,之前一直乱码,经过多番尝试
使用了.encode(‘ISO-8859-1’).decode(‘utf-8’)这个方法
from bs4 import BeautifulSoup
import requests
if __name__ == '__main__':
url='https://m.shicimingju.com/book/sanguoyanyi.html'
headers={
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Mobile Safari/537.36'}
page_text=requests.get(url=url,headers=headers)
#page_text.encoding = 'utf-8'
soup=BeautifulSoup(page_text.text.encode('ISO-8859-1').decode('utf-8'),'lxml')
li=soup.select('.book-mulu > ul > li')
f=open('sanguo.txt','w',encoding='utf-8')
for list in li:
title=list.a.string
#t=title.encode('utf-8').decode('gbk')
href='https://m.shicimingju.com'+list.a['href']
detail_text=requests.get(href,headers=headers)
#detail_text.encoding=