可以扩充成为简单的抓取工具,定时抓取
#
!usr/bin/python
import urllib2,time;
class ErrorHandler(urllib2.HTTPDefaultErrorHandler):
def http_error_default(self, req, fp, code, msg, headers):
result = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
result.status = code
return result
URL = ' http://www.ibm.com/developerworks/js/ajax1.js '
req = urllib2.Request(URL)
mgr = urllib2.build_opener(ErrorHandler())
while True:
ns = mgr.open(req)
if (ns.headers.has_key( ' last-modified ' )):
modified = ns.headers.get( ' last-modified ' )
if (ns.code == 304 ):
print '''
==============================
NOT MODIFIED
==============================
'''
elif (ns.code == 200 ):
print ns.read()
else :
print ' there is an error ' ;
if ( not locals().has_key( ' modified ' )):
modified = time.time();
req.add_header( ' If-Modified-Since ' ,modified)
time.sleep( 10 )
import urllib2,time;
class ErrorHandler(urllib2.HTTPDefaultErrorHandler):
def http_error_default(self, req, fp, code, msg, headers):
result = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
result.status = code
return result
URL = ' http://www.ibm.com/developerworks/js/ajax1.js '
req = urllib2.Request(URL)
mgr = urllib2.build_opener(ErrorHandler())
while True:
ns = mgr.open(req)
if (ns.headers.has_key( ' last-modified ' )):
modified = ns.headers.get( ' last-modified ' )
if (ns.code == 304 ):
print '''
==============================
NOT MODIFIED
==============================
'''
elif (ns.code == 200 ):
print ns.read()
else :
print ' there is an error ' ;
if ( not locals().has_key( ' modified ' )):
modified = time.time();
req.add_header( ' If-Modified-Since ' ,modified)
time.sleep( 10 )