“919网监助手1.0”这款软件的python源码文件由main.py、forms.py、events.py三个文件组成,上篇主要分享了main.py和forms.py的源代码,这篇分享的events.py为这款软件的主体,代码如下:
#coding:utf-8
import wx
import configparser
import requests
import re
import os
import string
from bs4 import BeautifulSoup
from openpyxl import Workbook
from openpyxl import load_workbook
from requests import exceptions
class Getlike():
def __init__(self, filePath,row1,erow1,bcol1,obj1):
self.filePath=filePath
self.row1=row1
self.erow1=erow1
self.bcol1=bcol1
self.obj1=obj1
def main1(self):
list1 = self.rexcel()
hr1 = []
st1 = []
for m in range(len(list1)):
site = list1[m]
if site == None:
break
else:
self.obj1.SetLabel(site)
if site[0:4] == 'http':
demo = self.get_html0(site)
if demo == '':
hr1.append(site)
st1.append(u'获取主页数据失败')
continue
soup = BeautifulSoup(demo, "html.parser")
if site[-1] == '/':
site = site[0:-1]
# print site
# print demo
i = 0
for a in soup.find_all('a'):
i += 1
if a.has_attr('href'):
if (a['href'][0:4] != 'http') and ('@' not in a['href']) and (
'javascript:' not in a['href']) and ('javasrcipt:' not in a['href']) and (
'tel:' not in a['href']): # 非站外链接且非正常链接
urlg = a['href']
if len(urlg) > 0:
if urlg[0] != '/':
urlg = '/' + urlg
urlhb = site + urlg
if urlhb in hr1: # 过虑重复链接
continue
hr1.append(urlhb)
st1.append(a.string)
if i == 0:
hr1.append(site)
st1.append(u'该网页无链接')
self.wexcel0(hr1, st1)
def get_html0(self,url):
'''
headers = {
'User-Agent': 'Mozilla/5.0(Macintosh; Intel Mac OS X 10_11_4)\
AppleWebKit/537.36(KHTML, like Gecko) Chrome/52 .0.2743. 116 Safari/537.36'
} #模拟浏览器访问
'''
headers = {
'User-Agent': 'Mozilla/5.0(Windows NT 10.0; WOW64; Trident/7.0; rv:11.0)\
like Gecko'
} # 模拟浏览器访问
t1 = 30
try:
r = requests.get(url, timeout=t1, headers=headers) # 请求访问网站
r.raise_for_status()
r.encoding = r.apparent_encoding
# print(r.status_code)
if r.status_code == 200:
return r.text
else:
return ''
except requests.RequestException as e:
#print(e)
# print(url)
return ''
def rexcel(self): # 读excel文件
list1 = []
if not os.path.exists(self.filePath):
wx.MessageBox(u'没找到excel文件', u'提示')
return list1
wb = load_workbook(self.filePath)
ws = wb.active
j =self.row1
if self.erow1<j :
erow2=ws.max_row
else:
erow2 =self.erow1
for r in range(j,erow2):
list1.append(ws.cell(row=r, column=self.bcol1).value)
r = r + 1
return list1
def wexcel0(self, hr1, st1): # 写excel文件,站内链接
if not os.path.exists(self.filePath):
#print("文件不存在")
return 0
wb = load_workbook(self.filePath)
ws = wb.active
nrows = ws.max_row+2
for r in range(1, nrows): #避免覆盖原数据
if ws.cell(row=r, column=self.bcol1+1).value is None:
break
#print(nrows)
#print(r)
for i in range(len(hr1)): #在空白处追加数据
ws.cell(row=r, column=self.bcol1+1).value = hr1[i]
ws.cell(row=r, column=self.bcol1+2).value = st1[i]
r = r + 1
if r> 1048575:
break
upper_string = string.ascii_uppercase[:5]
for co in upper_string:
ws.column_dimensions[co].width = 50 # 设置列宽
wb.save(self.filePath)
return 1
class Mgzfind(Getlike):
def __init__(self, filePath, row1, erow1, bcol1, obj1,mgz):
Getlike.__init__(self, filePath, row1, erow1, bcol1, obj1)
self.mgz=mgz
def main2(self):
list1 = Getlike.rexcel(self)
regex1 = self.mgz # 敏感词之间用|分隔
pattern = \
r'([\u4e00-\u9fa5]{1,}(' + regex1 + '))'
re_compile = re.compile(pattern)
list2 = []
for m in range(len(list1)):
content = ''
s1 = list1[m]
if s1 == None:
break
self.obj1.SetLabel(s1)
if s1[0:4] == 'http':
webcontent = self.get_html(s1)
else:
continue
# print webcontent
if webcontent == '':
list2.append(u'获取页面数据失败')
continue
else:
for n in re_compile.finditer(webcontent):
content = content + n.group() + ';'
list2.append(content)
self.wexcel(list1,list2)
def get_html(self,url):
headers = {
'User-Agent': 'Mozilla/5.0(Windows NT 10.0; WOW64; Trident/7.0; rv:11.0)\
like Gecko'
} # 模拟浏览器访问
# i = 0
t1 = 10
requests.adapters.DEFAULT_RETRIES = 3
# s = requests.session()
# s.keep_alive = False
try:
r = requests.get(url, timeout=t1, headers=headers) # 请求访问网站
r.raise_for_status()
r.encoding = r.apparent_encoding
# print(r.status_code)
if r.status_code == 200:
return r.text
else:
return ''
except requests.RequestException as e:
#s=e[0:15]
# print(e)
#self.frame.m_staticText11.SetLabel(s)
return ''
# i += 1
def wexcel(self,list1, list2): # 写excel文件,敏感词
if not os.path.exists(self.filePath):
#self.frame.m_staticText11.SetLabel("文件不存在")
return 0
wb = load_workbook(self.filePath)
ws = wb.active
col = 2 + self.bcol1
nrows = ws.max_row+2
for r in range(1, nrows): #避免覆盖原数据
if ws.cell(row=r, column=col).value is None:
break
for i in range(len(list2)):#在空白处追加数据
ws.cell(row=r, column=col).value = list1[i]
ws.cell(row=r, column=col+1).value = list2[i]
r = r + 1
if r > 1048575:
break
wb.save(self.filePath)
return 1
#主窗体事件
class MyFrame1_events():
def __init__(self, frame):
self.frame = frame
#设置图标
self.frame.SetIcon(wx.Icon(u't1.ico', wx.BITMAP_TYPE_ICO))
self.readcon()
#绑定事件
self.frame.Bind(wx.EVT_CLOSE, self.MyFrame1_OnClose)
self.frame.m_radioBtn15.Bind(wx.EVT_RADIOBUTTON, self.radio5)
self.frame.m_radioBtn6.Bind(wx.EVT_RADIOBUTTON, self.radio6)
self.frame.m_button14.Bind(wx.EVT_BUTTON, self.openf)
self.frame.m_button12.Bind(wx.EVT_BUTTON, self.mymain)
# 自定义函数
def readcon(self):
cf = configparser.ConfigParser()
cf.read("config.ini",encoding="utf-8")
secs = cf.sections()
filePath = cf.get("excel", "path")
if self.frame.m_radioBtn15.GetValue():
brow1 = cf.get("excel", "brow1")
erow1 = cf.get("excel", "erow1")
bcol1 = cf.get("excel", "bcol1")
else:
brow1 = cf.get("excel", "brow2")
erow1 = cf.get("excel", "erow2")
bcol1 = cf.get("excel", "bcol2")
word = cf.get("keyword", "word")
word = word.replace(",", "|")
self.frame.m_textCtrl25.Clear()
#filePath=filePath.decode('gbk', 'ignore').encode('utf-8')
self.frame.m_textCtrl25.WriteText(filePath)
self.frame.m_spinCtrl1.SetValue(brow1)
self.frame.m_spinCtrl2.SetValue(erow1)
self.frame.m_spinCtrl3.SetValue(bcol1)
#word1=word.decode('gbk', 'ignore').encode('utf-8')
self.frame.m_textCtrl32.Clear()
self.frame.m_textCtrl32.WriteText(word)
return
def savecon(self):
filePath = self.frame.m_textCtrl25.GetValue()
#brow1 = self.frame.m_spinCtrl1.GetValue()
erow1 = self.frame.m_spinCtrl2.GetValue()
bcol1 = self.frame.m_spinCtrl3.GetValue()
mgz = self.frame.m_textCtrl32.GetValue()
mgz = mgz.replace(",", "|")
cf = configparser.ConfigParser()
cf.read("config.ini", encoding="utf-8")
secs = cf.sections()
cf.set("keyword", "word", mgz)
cf.set("excel", "path", filePath)
if self.frame.m_radioBtn15.GetValue():
cf.set("excel", "brow1",str(erow1))
cf.set("excel", "erow1", str(erow1+10))
cf.set("excel", "bcol1", str(bcol1))
else:
cf.set("excel", "brow2", str(erow1))
cf.set("excel", "erow2", str(erow1+10))
cf.set("excel", "bcol2", str(bcol1))
cf.write(open('config.ini', "r+",encoding='UTF-8'))
self.frame.m_spinCtrl1.SetValue(erow1)
self.frame.m_spinCtrl2.SetValue(erow1+10)
return
#关闭窗口事件
def MyFrame1_OnClose(self, event):
"""窗体关闭"""
#关闭提示
if self.frame.m_button12.GetLabel()==u'确定':
frame = event.GetEventObject()
frame.Destroy()
else:
wx.MessageBox(u'程序进在运行,不能关闭窗口', u'提示')
frame = event.GetEventObject()
frame.Destroy()
'''
dlg = wx.MessageBox(message=u'Do you want to close?', caption=u'tip', style=wx.YES_NO|wx.NO_DEFAULT)
if dlg == wx.YES:
frame = event.GetEventObject()
frame.Destroy()
'''
event.Skip()
# 浏览excel文件事件
def openf(self, event):
openFileDialog = wx.FileDialog(self.frame, "请选择要打开的Excel文件", "", "",
"Excel格式 (*.xlsx)|*.xlsx",
wx.FD_OPEN | wx.FD_FILE_MUST_EXIST)
if openFileDialog.ShowModal() == wx.ID_OK:
filePath = openFileDialog.GetPath()
self.frame.m_textCtrl25.Clear()
self.frame.m_textCtrl25.WriteText(filePath)
#if wx.MessageBox("数据处理完成", "提示", wx.OK | wx.ICON_INFORMATION) == wx.OK:
openFileDialog.Destroy()
event.Skip()
def radio5(self, event):
self.readcon()
event.Skip()
def radio6(self, event):
self.readcon()
event.Skip()
def mymain(self, event):
self.frame.m_button12.Enable(False)
self.frame.m_button12.SetLabel('正在运行...')
filePath = self.frame.m_textCtrl25.GetValue()
row1 = self.frame.m_spinCtrl1.GetValue()
erow1=self.frame.m_spinCtrl2.GetValue()
bcol1=self.frame.m_spinCtrl3.GetValue()
obj1=self.frame.m_staticText11
mgz=self.frame.m_textCtrl32.GetValue()
mgz =mgz.replace(",","|")
if self.frame.m_radioBtn15.GetValue():
emp1 =Getlike(filePath,row1,erow1,bcol1,obj1)
emp1.main1()
else:
emp2 = Mgzfind(filePath, row1, erow1, bcol1, obj1,mgz)
emp2.main2()
self.savecon()
obj1.SetLabel('完成')
self.frame.m_button12.Enable(True)
self.frame.m_button12.SetLabel('确定')
event.Skip()
```上述代码中自定义了父类Getlike和子类Mgzfind,这是我为了学习和体验Python类的定义和使用而故意为之,其实也可以改为主窗体事件内的自定义函数。