爬取脚本之家python文章列表,发现中文乱码
import requests
from bs4 import BeautifulSoup
url = 'https://www.jb51.net/list/list_97_1.htm'
response = requests.get(url)
html = response.text
soup = BeautifulSoup(html,'lxml')
txt = soup.select('div.artlist dl dt')
print(txt)
href_list = []
base_url = 'https://www.jb51.net'
for i in txt:
title = i.select('a')[0]['title']
href = base_url + i.select('a')[0]['href']
print(href,title)
-------------------------------------------------------------------------------------------------------------
https://www.jb51.net/article/147721.htm pythonʹÓöà½ø³ÌµÄʵÀýÏê½â
https://www.jb51.net/article/147712.htm Anaconda2 5.2.0°²×°Ê¹ÓÃͼÎĽ̳Ì
https://www.jb51.net/article/147710.htm win10ϵͳÏÂAnaconda3°²×°ÅäÖ÷½·¨Í¼ÎĽ̳Ì
https://www.jb51.net/article/147707.htm Window 64λÏÂpython3.6.2»·¾³´î½¨Í¼ÎĽ̳Ì
https://ww