使用python遇到UnicodeDecodeError: 'gb2312' codec can't decode byte 0x88 in position 164111: illegal multibyte sequence
# 基金抓取 from urllib import request import chardet page1_url = "http://fund.eastmoney.com/fund.html" def getHtml(pageUrl): response = request.urlopen(pageUrl) raw_html = response.read() getEncoding = chardet.detect(raw_html)['encoding'] src = raw_html.decode(getEncoding) print(src) getHtml(page1_url)
这么办?大概意思是 网页有 非法字符你需要加上ignore
# 基金抓取 from urllib import request import chardet page1_url = "http://fund.eastmoney.com/fund.html" def getHtml(pageUrl): response = request.urlopen(pageUrl) raw_html = response.read() getEncoding = chardet.detect(raw_html)['encoding'] src = raw_html.decode(getEncoding, 'ignore') print(src) getHtml(page1_url)