import urllib2
import socket
import threading
import fcntl
#import portalocker
def test_urls(f1,f2,f3,f4,threadnum):
while 1:
fcntl.flock(f1, fcntl.LOCK_EX)
urlstr=f1.readline()
fcntl.flock(f1,fcntl.LOCK_UN)
if urlstr:
datastream=None
try:
request = urllib2.Request(urlstr)
opener = urllib2.build_opener()
datastream = opener.open(request)
if datastream:
if datastream.headers.has_key('Expires'):
fcntl.flock(f2, fcntl.LOCK_EX)
f2.write(urlstr)
fcntl.flock(f2,fcntl.LOCK_UN)
else:
str_cache_control = datastream.headers.get('cache-control')
if str_cache_control and str_cache_control.find('max-age')!= '-1':
fcntl.flock(f2, fcntl.LOCK_EX)
f2.write(urlstr)
fcntl.flock(f2,fcntl.LOCK_UN)
else:
fcntl.flock(f3, fcntl.LOCK_EX)
f3.write(urlstr)
fcntl.flock(f3,fcntl.LOCK_UN)
except IOError, e:
fcntl.flock(f4, fcntl.LOCK_EX)
f4.write(urlstr)
fcntl.flock(f4,fcntl.LOCK_UN)
print threadnum
else:
break
#set socket timeout
timeout=10
socket.setdefaulttimeout(timeout)
f1=open("urls.txt","r")
f2=open('haveExprires.txt','w')
f3=open('noExprires.txt','w')
f4=open('cantBeOpen_urls.txt', 'w')
threads = []
num = 10
for x in xrange(0,num):
threads.append(threading.Thread(target=test_urls, args=(f1,f2,f3,f4,x,)))
for t in threads:
t.start()
for t in threads:
t.join()
f1.close()
f2.close()
f3.close()
f4.close()
import socket
import threading
import fcntl
#import portalocker
def test_urls(f1,f2,f3,f4,threadnum):
while 1:
fcntl.flock(f1, fcntl.LOCK_EX)
urlstr=f1.readline()
fcntl.flock(f1,fcntl.LOCK_UN)
if urlstr:
datastream=None
try:
request = urllib2.Request(urlstr)
opener = urllib2.build_opener()
datastream = opener.open(request)
if datastream:
if datastream.headers.has_key('Expires'):
fcntl.flock(f2, fcntl.LOCK_EX)
f2.write(urlstr)
fcntl.flock(f2,fcntl.LOCK_UN)
else:
str_cache_control = datastream.headers.get('cache-control')
if str_cache_control and str_cache_control.find('max-age')!= '-1':
fcntl.flock(f2, fcntl.LOCK_EX)
f2.write(urlstr)
fcntl.flock(f2,fcntl.LOCK_UN)
else:
fcntl.flock(f3, fcntl.LOCK_EX)
f3.write(urlstr)
fcntl.flock(f3,fcntl.LOCK_UN)
except IOError, e:
fcntl.flock(f4, fcntl.LOCK_EX)
f4.write(urlstr)
fcntl.flock(f4,fcntl.LOCK_UN)
print threadnum
else:
break
#set socket timeout
timeout=10
socket.setdefaulttimeout(timeout)
f1=open("urls.txt","r")
f2=open('haveExprires.txt','w')
f3=open('noExprires.txt','w')
f4=open('cantBeOpen_urls.txt', 'w')
threads = []
num = 10
for x in xrange(0,num):
threads.append(threading.Thread(target=test_urls, args=(f1,f2,f3,f4,x,)))
for t in threads:
t.start()
for t in threads:
t.join()
f1.close()
f2.close()
f3.close()
f4.close()