发生读取HTML页面中文乱码
# 简洁地处理HTML文件
import bs4
import requests
import logging,sys
print(sys.getfilesystemencoding())
#print('Html is encoding by : %',chardet.detect(GetHtml(url)))
logging.basicConfig(level=logging.DEBUG, format='%(message)s')
res = requests.get(url)
res.encoding = 'utf-8' #需要添加这一行,告知html文件解码方式
res.raise_for_status()
myBS4 = bs4.BeautifulSoup(res.text)
logging.debug(type(myBS4))
elems = myBS4.select('input[type="button"]')
#print(str(elems[0]['value']))
###关键属性:attrs,获得该元素的属性字典
print(elems[0].attrs)