两个csv 文件找出相同的源IP,并找出被攻击的次数
# -*- coding: utf-8 -*-
import csv
import os
path = os.path.abspath('.')
filename1 = path + "\scanner_type5_srcip_dipnum.csv"
filename2 = path + "\ddos_srcip_dipnum.csv"
filename3 = path + "\web_ddos_srcip_dipnum.csv"
list1 = []
list1_ip = []
list1_num = []
with open(filename1) as f1:
reader1 = csv.reader(f1,quotechar=' ')
for row1 in reader1:
list1.append(row1)
list1_ip.append(row1[0])
list1_num.append(row1[1])
#print list1_ip
# print (list1[1][0])
list2 = []
list2_ip = []
list2_num = []
with open(filename2) as f2:
reader2 = csv.reader(f2)
for row2 in reader2:
list2.append(row2)
list2_ip.append(row2[0])
list2_num.append(row2[1])
# same_ip = set(list1_ip) & set(list2_ip)
same_ip_list = list(set(list1_ip).intersection(set(list2_ip)))
#print same_ip_list
fo = open(filename3, "wb")
for i in same_ip_list:
#print i
id1 = list1_ip.index(i)
id2 = list2_ip.index(i)
line = i + "," + list1_num[id1] + "," + list2_num[id2] + "\n"
fo.write(line)
fo.closed
中间有遇到csv文件的分隔符不是 "," 而是“|”,而且文件的达到了400万行,用notepad批量替换会卡死,这个时候用shell的sed命令处理效果不错,也效率。命令如下:
sed -i 's/|/,/g' ddos_srcip_dipnum.txt