importmysql.connectorimportsys, osimporturllib.requestimportreimportitertools
user= 'root'pwd= ''host= '127.0.0.1'db= 'test'data_file= 'wooyun.dat'create_table_sql= "CREATE TABLE IF NOT EXISTS mytable (id int(10) AUTO_INCREMENT PRIMARY KEY, type varchar(300) , info varchar(1000) , detail varchar(5000) , repair varchar(1000) )CHARACTER SET utf8"insert_sql= "INSERT INTO mytable (type, info, detail, repair) VALUES ( %s, %s, %s, %s)"select_sql= "SELECT id, type, info, detail, repair FROM mytable"cnx= mysql.connector.connect(user=user, password=pwd, host=host, database=db)
cursor=cnx.cursor()defcreate_table_sql_api(a):try:
cursor.execute(a)exceptmysql.connector.Error as err:print("create table 'mytable' failed.")print("Error: {}".format(err.msg))
sys.exit()definsert_sql_api(a,b):try:
cursor.execute(a,b)exceptmysql.connector.Error as err:print("insert table 'mytable' failed.")print("Error: {}".format(err.msg))
sys.exit()defselect_sql_api(a):try:
cursor.execute(a)for (id, type, info, detail, repair) incursor:print("ID:{} type:{} info:{} repair:{}".format(id, type, info, detail, repair))exceptmysql.connector.Error as err:print("query table 'mytable' failed.")print("Error: {}".format(err.msg))
sys.exit()defget_html_response(url):
html_response= urllib.request.urlopen(url).read().decode('utf-8')returnhtml_responsedefgeturl(starturl):
a=get_html_response(starturl)
childurl=(re.findall(r'/bugs/wooyun-\w*-\w*\b',a))returnchildurldefget_nextpage(starturl):
d=get_html_response(starturl)
nextpage=(re.findall(r'searchbug.php\?q=6YeR6J6N&pNO=\w',d))returnnextpage
starturl="http://www.wooyun.org/searchbug.php?q=6YeR6J6N"result=[]
final=[]
type_wooyun_n=[]
info_n=[]
detail_n=[]
repair_n=[]#output=open("D:\\wooyun.csv","w+")
create_table_sql_api(create_table_sql)for i inget_nextpage(starturl):
result+=geturl('http://wooyun.org/'+re.sub('金融','6YeR6J6N',i))#扫描各种漏洞的url地址放入result中
result=set(result)#去除result中重复的地址
for i inresult:
k=get_html_response('http://wooyun.org/'+re.sub('金融','%E9%87%91%E8%9E%8D',i))#下载页面到k
type_wooyun=re.findall(r'漏洞类型:.*.',k)
info=re.findall(r'
\w*:.*.
',k)#空白字符用/s,寻找所有适用于标签的文字
detail=re.findall(r'
.*.
',k)repair=re.findall(r'修复方案:\s*
.*.\s*
',k)for j in type_wooyun:#漏洞类型,为之后进行数据库分类做准备j=re.sub(r':\s',':',j)
j=re.sub(r'\t','',j)
j=re.sub(r'','',j)
type_wooyun_n+=jfor j in info:#处理概要
j=re.sub(r':\s',':',j)
j=re.sub(r'
','',j)
j=re.sub(r'','',j)
j=re.sub(r'','',j)
j=re.sub(r'','',j)
j=re.sub(r'','',j)
j=j.split()
info_n+=jfor j in detail:#处理详情
j=re.sub(r':\s',':',j)
j=re.sub(r'
','',j)
j=re.sub(r'
','',j)j=re.sub(r'"\starget="_blank">',',',j)
j=re.sub(r'','',j)
j=re.sub(r':',':',j)
j=j.split()
repair_n+=j
type_wooyun_str="".join(itertools.chain(*type_wooyun_n))
info_str="".join(itertools.chain(*info_n))
detail_str="".join(itertools.chain(*detail_n))
repair_str="".join(itertools.chain(*repair_n))
final.append(type_wooyun_str)
final.append(info_str)
final.append(detail_str)
final.append(repair_str)
insert_sql_api(insert_sql,tuple(final))
select_sql_api(select_sql)#output.writelines(final)
#output.writelines('\n\n')
final.clear()
repair_n.clear()
info_n.clear()
type_wooyun_n.clear()
detail_n.clear()
cnx.commit()
cursor.close()
cnx.close()#output.close()