import requests
import random
import ast
import re
import socketserver
class MyTCPHandler(socketserver.BaseRequestHandler):
def handle(self):
try:
while True:
self.data=self.request.recv(1024)
print("{} send:".format(self.client_address),self.data)
res_data=[]
if not self.data:
print("connection lost")
break
domainName = bytes.decode(self.data)
f = open('ip.txt')
line = f.readline()
proxy = []
while line:
proxy.append({'http': line, 'https': line})
line = f.readline()
f.close()
my_headers = [
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
"Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 "
]
headers = {
'User-Agent': random.choice(my_headers),
'Host': 'web.archive.org',
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Pragma': 'no-cache',
'Proxy-Connection': 'keep-alive',
'Cookie': 'donation-identifier=4af65d2d27ee080495eb36df2e926b80; PHPSESSID=njkc4n971skbk4v38o4tf4uu92'
}
url = 'http://web.archive.org/cdx/search/cdx?url=' + domainName + '&fl=timestamp,original&collapse=timestamp&limit=1000&output=json'
reso = requests.get(url, proxies=random.choice(proxy), headers=headers).text
data = ast.literal_eval(str(reso))
try:
del (data[0])
except Exception as e:
res_data.append("false")
j = 0
for i in data:
j += 1
url = 'http://web.archive.org/web/' + i[0] + '/' + i[1]
rest = requests.get(url, proxies=random.choice(proxy), headers=headers).text
try:
rep1 = j, re.search('(?<=title\>)[\s\S]*?(?=</title)', rest).group().replace('\n', '').replace(
'\r', '')
print(type(rep1),rep1)
res_data.append(rep1)
except Exception as e:
rep2 = j, "url:" + url, "没有title"
print(type(rep2),rep2)
res_data.append(rep2)
print(type(res_data),type(str.encode(str(res_data))))
print(res_data)
self.request.sendall(str.encode(str(res_data)))
except Exception as e:
print(self.client_address,"连接断开")
finally:
self.request.close()
def setup(self):
print("before handle,连接建立:",self.client_address)
def finish(self):
print("finish run after handle")
HOST,PORT = "localhost",9999
server=socketserver.ThreadingTCPServer((HOST,PORT),MyTCPHandler)
server.serve_forever()
import socket
client=socket.socket()
client.connect(('localhost',9999))
while True:
cmd=input("(quit退出)>>").strip()
if len(cmd)==0:
continue
if cmd=="quit":
break
client.send(cmd.encode())
cmd_res=client.recv(10240)
print(cmd_res.decode())
client.close()