封装自己的简易爬虫框架
1.框架封装
import urllib2
class my_crawler:
def __init__(self,url,path):
self._url=url
self._path=path
def read_resource(self):
raw_url=urllib2.quote(self._url)
url=urllib2.unquote(raw_url)
return urllib2.urlopen(url,timeout=5).read()
def write_resource(self):
try:
f=open(self._path,'wb')
f.write(self.read_resource())
f.close()
print (self._url+"信息成功爬取并写入"+self._path)
except Exception as e:
print ("出现异常"+e)
2.框架使用
from my_crawler_framework import my_crawler
url="http://www.baidu.com/s?wd=肥猫下楼吃面包"
path="d:/img/crawlers/zp.html"
framework=my_crawler(url,path)
framework.write_resource()