[Python] 纯文本查看 复制代码#coding = utf-8
import re
import requests
import time
from bs4 import BeautifulSoup as asp
import random
headeraa = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)',}
hansb = open('urllist.txt','r')
hanssb = hansb.readlines()
hansb.close()
print hanssb
for urllists in hanssb:
urllistx = urllists.strip('\n')
print urllistx
time.sleep(3)
try:
sss5 = []
han = requests.get(url=urllistx,headers=headeraa,timeout=10)
print han.status_code
print han.content
soup = asp(han.content)
hrefs = soup.find_all(href=re.compile(r'asp?'))#寻找a标签,带href中带有ASP?
for href in hrefs:
href = href.get('href')
print '========================='
print href
# zhzhzh = open('zzzzz.txt','a+') #开始写入
# zhzhzh.write(urllists + href + '\n')
sss5.append(href)
# zhzhzh.close()
except:
print 'connect time out'
print sss5
sss4 = random.sample(sss5,3)
print '---------------'
print sss4
print '---------------'