用途当然是模拟浏览器
# -*- coding: utf-8 -*-
"""
使用pycurl发起HTTP访问
"""
import pycurl, urllib
import StringIO
def curl(url, method="GET", proxy=(), verbose=0, cookie_file="", **kwargs):
#proxy = ("www.test.com:8080", "SOCKS5", "user:pass")
req = pycurl.Curl()
req.setopt(pycurl.FOLLOWLOCATION, 1)
req.setopt(pycurl.MAXREDIRS, 5)
req.setopt(pycurl.CONNECTTIMEOUT, 60)
req.setopt(pycurl.TIMEOUT, 600)
req.setopt(pycurl.HTTPPROXYTUNNEL, 1)
req.setopt(pycurl.VERBOSE, verbose)
agent = kwargs.pop("agent", "Mozilla/4.0")
req.setopt(pycurl.USERAGENT, agent)
if cookie_file:
req.setopt(pycurl.COOKIEJAR, cookie_file)
req.fp = StringIO.StringIO()
req.setopt(pycurl.WRITEFUNCTION, req.fp.write)
data = urllib.urlencode(kwargs)
if method.upper() == "GET":
if data:
url = url + "?" + data
req.setopt(pycurl.URL, url)
else:
req.setopt(pycurl.URL, url)
req.setopt(pycurl.POSTFIELDS, data)
if proxy:
host_port, sock_type, user_pass = proxy
req.setopt(pycurl.PROXY, host_port)
req.setopt(pycurl.PROXYTYPE, sock_type)
req.setopt(pycurl.PROXYUSERPWD, user_pass)
req.perform()
return req.fp.getvalue()