1、所需python包:
from qqwry import QQwry #pip install qqwry-py3
from IPy import IP
import socket
import threading
import csv
import time
import eventlet
from urllib.parse import urlparse
q = QQwry()
q.load_file('qqwry.dat')
2、多线程处理函数:
threads_ip(参数1,参数2):
输入:线程数,要处理的csv文件(只有一列netloc)
输出:有ip结果的csv(netloc,ip, address)
3、部分代码示例:
def ip_csv(batch_list, csv_path):
"""
输入:list的字典 【{列名:值},.....】
输出:ip 归属地写入的csv文件
:param batch_list:
:return:
"""
with open(csv_path, "a") as csvfile: # ip归属地信息保存,可以指定列名
writer = csv.writer(csvfile)
#writer.writerow(["country", "server", "ip"])
#ip_list = f.read().splitlines()
for url in batch_list:
#id = url['html_content_id']
url_value= url['netloc']
address =[]
ip = ''
if url_value.startswith("http"):
net = urlparse(url_value).netloc
try:
net = url_value
with eventlet.Timeout(5, False):
ip = socket.gethostbyname(net)
address = list(q.lookup(ip))
except Exception as e:
pass
res = [url_value, ip, address]
writer.writerow(res)
def threads_ip(thread_num, txt_path):
"""
多线程
:param thread_num:
:return:分片处理,结果写入csv
"""
#处理csv文件
with open(txt_path, "r", encoding='utf-8') as f:
reader = csv.DictReader(f)
rows = [row for row in reader]
per_num = int(len(rows)/thread_num) #分片
threads = []
for i in range(thread_num):
t = threading.Thread(target=ip_csv, args=(rows[i*per_num:i*per_num+per_num],txt_path)) #多线程
threads.append(t)
for t in threads:
t.setDaemon(True)
t.start()
for t in threads:
t.join()