#-*-coding:utf8-*-
import requests
import re
import sys
reload(sys)
sys.setdefaultencoding("gb18030")
type = sys.getfilesystemencoding()
html = requests.get('http://jp.tingroom.com/yuedu/yd300p/')
html.encoding = 'utf-8'
print html.text.encode("gb18030")
title = re.findall('color:#666666;">(.*?)</span>',html.text,re.S)
for each in title:
print each
chinese = re.findall('color: #039;">(.*?)</a>',html.text,re.S)
for each in chinese:
print each
编程中遇到的问题及解决方案:
问题1:字符编码格式不匹配
D:\Python27\python.exe D:/pycharm/class2/test.py
Traceback (most recent call last):
File "D:/pycharm/class2/test.py", line 12, in <module>
print html.text
UnicodeEncodeError: 'gbk' codec can't encode character u'\xa9' in position 28478: illegal multibyte sequence
Process finished with exit code 1