#coding=utf-8
import urllib
import urllib2
import threading
import HTMLParser
import Queue
import os
import StringIO
import gzip
import re
import time
class GetUrllist(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.Urlqueue=Queue.Queue(-1)
def handle_starttag(self,tag,attrs):
if tag =='a':
for key,value in attrs:
if key =='href':
print value
self.Urlqueue.put(value)
class spider(threading.Thread):
def __init__(self,myname,parser,path='F:\\uuuuu'):
threading.Thread.__init__(self)
self.count=0
self.name=myna
import urllib
import urllib2
import threading
import HTMLParser
import Queue
import os
import StringIO
import gzip
import re
import time
class GetUrllist(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.Urlqueue=Queue.Queue(-1)
def handle_starttag(self,tag,attrs):
if tag =='a':
for key,value in attrs:
if key =='href':
print value
self.Urlqueue.put(value)
class spider(threading.Thread):
def __init__(self,myname,parser,path='F:\\uuuuu'):
threading.Thread.__init__(self)
self.count=0
self.name=myna