圣诞节快乐(づ ̄ 3 ̄)づ~~~
在这个半放假的日子里,人也变得慵懒起来,在MOOC下学习了静态页面的简单爬虫(传送门:http://www.imooc.com/learn/563),干货满满啊~~
所以爬了一个芈月传么么哒~~~
# coding=utf-8
import urllib2
class UrlManager(object):
def __init__(self):
self.new_urls=set()
self.old_urls=set()
def add_new_url(self,url):
if url is None:
return
if url not in self.new_urls and url not in self.old_urls:
self.new_urls.add(url)
def add_new_urls(self,urls):
if urls is None or len(urls)==0:
return
for url in urls:
self.add_new_url(url)
def has_new_url(self):
return len(self.new_urls)!=