使用python将whois信息分片,存入数据库
whois服务器返回的whois字典信息会很长,有的域名的whois信息有2000+个字节,将返回的whois信息进行分片处理,程序选择将其分为9片,存入数据库中
也可以进行关键字匹配进行存储,但是得到的信息会不够全面, 两种方法各有优劣。
以下是分片处理的python源代码
'''
--------------------------
ver : 2.0
date : 2017/11/22
auth : wud
--------------------------
'''
import DNS
import MySQLdb
import whois
import sys
from time import sleep
from time import ctime
import time
def getwhois(url):
try:
print "finding whois information..."
data = whois.whois(url)
print "whois query successfully!"
#print data
return data
except:
print "whois query fail!"
pass
def getip(url):
try:
query = sys.argv[0]
DNS.DiscoverNameServers()
reqobj = DNS.Request(url)
answerobj = reqobj.req(name=query, qtype=DNS.Type.A)
if not len(answerobj.answers):
return
for item in answerobj.answers:
ip = ("%s") % (item['data'])
print "IP is: ", ip
return ip
except:
print "TIME OUT"
f2 = open("fail.txt", 'r+')
print >> f2, url
f2.close()
pass
def whoisoperation(ip,url,data):
info = str(data)
size = len(info)
info1 = info[:size/9]
info2 = info[size/9:size*2/9]
info3 = info[size*2/9:size*3/9]
info4 = info[size*3/9:size*4/9]
info5 = info[size*4/9:size*5/9]
info6 = info[size*5/9:size*6/9]
info7 = info[size*6/9:size*7/9]
info8 = info[size*7/9:size*8/9]
info9 = info[size*8/9:]
#print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
#print info1,"\n",info2,"\n",info3,"\n",info4,"\n",info5,"\n",info6,"\n",info7,"\n",info8,"\n",info9
#print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
#info2 = info[len/9:len / 9]
try:
print "databaes connecting"
db = MySQLdb.connect("***.***.***.***", "*****", "********", "***", charset="utf8")
print "connected!"
cursor = db.cursor()
cursor.execute('INSERT INTO whois_info2 (url,ip,info_1,info_2,info_3,info_4,info_5,info_6,info_7,info_8,info_9,insert_time)values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',[url,ip,info1,info2,info3,info4,info5,info6,info7,info8,info9,ctime()])
db.commit()
print "insert successfully!"
except:
print "error"
def main():
f = open("whois_test.txt",'r')
f1 = open("whois.txt",'r+')
flag = 92
while(flag>0):
a = time.time()
url = f.readline()[:-2]
print 93-flag
print url
ip = getip(url)
data = getwhois(url)
whoisoperation(ip,url,data)
print >>f1, url
print >>f1, ip
print >>f1, data
print "using time :", time.time()-a
sleep(10)
flag-=1
if __name__ == '__main__':
main()