爬虫抓取的方式有两种:
1.简单
import urllib2
dir(urllib2)
urllib2.urlopen(url)
html=responce.read()
2.复杂
import urllib2
def make_opener(headers=None):
cookie=cookielib.CookieJar()
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie),urllib2.HTTPHandler)
if headers:
opener.addheades=headers
else:
opener.addheaders=[('User-Agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; QQDownload 718)'),('Accept', 'text /html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')]
return opener
opener=make_opener()
responce=opener.open(url,timeout=10)
html=responce.read()