如下:python2 代码: 在python3中报错
#coding=utf-8
import urllib,time
import StringIO
import pycurl
def get_baidu_html(url):
html = StringIO.StringIO()
c = pycurl.Curl()
myurl="http://www.soso.com/s?word=%s"%url
c.setopt(pycurl.URL, myurl)
c.setopt(pycurl.WRITEFUNCTION, html.write)
c.setopt(pycurl.FOLLOWLOCATION, 1)
c.setopt(pycurl.MAXREDIRS, 5)
c.setopt(pycurl.CONNECTTIMEOUT, 2000)
c.setopt(pycurl.TIMEOUT, 2000)
c.setopt(pycurl.USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)")
ret = c.perform()
ret = html.getvalue()
import StringIO #pyhton3 中改为 引入import io 引入from io import StringIO
在运行报错:python3中报错pycurl.error: (23, 'Failed writing body (0 != 46)')
问题在于pycurl不再像以前那样使用StringIO。解决方法是使用io.BytesIO代替。然后可以将信息写入缓冲区并将其解码为字符串。
对pycurl而不是StringIO使用BytesIO:
html= io.BytesIO()
c.setopt(pycurl.WRITEFUNCTION, html.write)
从BytesIO对象解码字节信息:
htmlString = e.getvalue().decode('UTF-8') #指定的编码格式解码字符串
修改后的Python3 代码:
import urllib,time
import io
from io import BytesIO
import pycurl
from io import StringIO
def get_baidu_html(url):
#html = StringIO.StringIO()
#html = io.StringIO()
html = io.BytesIO() #python3中使用io.BytesIO代替StringIO
c = pycurl.Curl()
myurl="http://www.soso.com/s?word=%s"%url
c.setopt(pycurl.URL, myurl)
c.setopt(pycurl.WRITEFUNCTION, html.write)
c.setopt(pycurl.FOLLOWLOCATION, 1)
c.setopt(pycurl.MAXREDIRS, 5)
c.setopt(pycurl.CONNECTTIMEOUT, 2000)
c.setopt(pycurl.TIMEOUT, 2000)
c.setopt(pycurl.USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)")
print(html)
ret = c.perform()
#ret = html.getvalue()
ret = html.getvalue().decode('UTF-8')#指定的编码格式解码字符串
pycurl 的安装 pip install pycrul 报错:
去http://www.lfd.uci.edu/~gohlke/pythonlibs/,下载 pycurl,使用ctrl+f搜索pycurl关键字就好了.然后点击下载对应的版本。使用命令本地安装:
执行 pip install d:\pycurl-你的版本-win_amd64.whl