由于之前工作需要实时去测试数百个网站的各种状态,人工一个个去点击实在是有心无力,去网上下载的一些网站域名访问的工具也不是很精准。
因此我用python写了给脚本去读取网站,检测它们的各个状态:
1.网站域名访问状态
2.网站重定向检测
3.网站模板检测
4.网站地图检测
最后将结果保存在excel中,如下图:
运行程序:
将excel表格放入对于路径,点击域名检测即可。
代码如下:
# -*- coding: UTF-8 -*-
from openpyxl import Workbook,load_workbook
import requests,time,os
import tkinter as tk
import tkinter.messagebox
import threading
import re
"""---------------------首页内容重试机制------------------------------------"""
def n_again(end_url,headers):
a = 1
while a < 4 :
try:
url_relust = requests.get(end_url, headers=headers)
url_status = url_relust.status_code
if url_status == 200:
url_text = url_relust.content.decode('utf-8')
return find_result(url_text,end_url)
else:
a += 1
if a == 4:
return "服务错误"
else:
time.sleep(1)
except:
a += 1
if a == 4:
return "服务错误"
else:
time.sleep(1)
"""--------------------初始化数据判断----------------------------------"""
def find_result(url_text,end_url):
f_list = []
pattern = re.compile(r'这里编辑(.*?)<')
check_result = pattern.findall(url_text)
for nei in check_result:
f_list.append(nei.strip() + " " + end_url)
return f_list
'''------------------网站首页内容初始值检测--------------------------'''
def sn_check(end_url,headers):
end_list = []
try:
url_relust = requests.get(end_url, headers=headers)
"""首页"""
try:
url_text = url_relust.content.decode('utf-8')
n_r = find_result(url_text,end_url)
if n_r ==[]:
return "pass"
else:
end_list.extend(n_r)
return end_list
except:
url_text = url_relust.content.decode('gbk')
n_r = find_result(url_text,end_url)
if n_r ==[]:
return "pass"
else:
end_list.extend(n_r)
except:
return n_again(end_url,headers)
"""------------------------301重定向----------------------------"""
def three_go(url,headers):
if url.count(".") == 1:
try:
url_relust = requests.head(url, headers=headers)
url_status = url_relust.status_code
if url_status == 301 :
return "301"
else:
return three_again(url, headers)
except:
return three_again(url, headers)
else:
return "--"
def three_again(end_url, headers):#301重试机制
a = 1
while a < 4 :
try:
url_relust = requests.head(end_url, headers=headers)
url_status = url_relust.status_code
if url_status == 301:
return "301"
else:
a += 1
if a == 4:
return "Error"
else:
time.sleep(1)
except:
a += 1
if a == 4:
return "Error"
else:
time.sleep(1)
"""------------------------网站地图----------------------------"""
def site_check(url,headers):
try:
baidu_url = url + "/sitemap.txt"
baidu_relust = requests.get(baidu_url, headers=headers)
baidu_status = baidu_relust.status_code
if baidu_status != 200:
baidu_relust = requests.get(baidu_url, headers=headers)
baidu_status = baidu_relust.status_code
if baidu_status != 200:
try:
# url_text = baidu_relust.content.decode('utf-8')
return "Error"
except:
# url_text = baidu_relust.content.decode('gbk')
return "Error"
else:
try:
url_text = baidu_relust.content.decode('utf-8')
url_list = url_text.split("\n")[:-1]
url_number = len(url_list)
check_url = url.split("//")[-1]
true_list = [i for i in url_list if check_url in i]
if len(true_list) != url_number:
return "Error"
else:
if "www" not in url:
r_url = "http://www." + check_url
e_url = "http://www." + check_url + "/index.html"
if r_url != url_list[0] and e_url in url_list:
return "主域名信息不正确且存在index.html"
elif r_url != url_list[0] and e_url not in url_list:
return "主域名信息不正确"
elif r_url == url_list[0] and e_url in url_list:
return "存在index.html"
elif r_url == url_list[0] and e_url not in url_list:
return "Yes"
else:
r_url = url.strip()
e_url = r_url + "/index.html"
if r_url != url_list[0] and e_url in url_list:
return "主域名信息不正确且存在index.html"
elif r_url != url_list[0] and e_url not in url_list:
return "主域名信息不正确"
elif r_url == url_list[0] and e_url in url_list:
return "存在index.html"
elif r_url == url_list[0] and e_url not in url_list:
return "Yes"
except:
# url_text = baidu_relust.content.decode('gbk')
return "Error"
else:
try:
url_text = baidu_relust.content.decode('utf-8')
url_list = url_text.split("\n")[:-1]
url_number = len(url_list)
check_url = url.split("//")[-1]
true_list = [i for i in url_list if check_url in i]
if len(true_list) != url_number:
return "Error"
else:
if "www" not in url:
r_url = "http://www."+ check_url
e_url = "http://www."+ check_url +"/index.html"
if r_url != url_list[0] and e_url in url_list:
return "主域名信息不正确且存在index.html"
elif r_url != url_list[0] and e_url not in url_list:
return "主域名信息不正确"
elif r_url == url_list[0] and e_url in url_list:
return "存在index.html"
elif r_url == url_list[0] and e_url not in url_list:
return "Yes"
else:
r_url = url.strip()
e_url = r_url + "/index.html"
if r_url != url_list[0] and e_url in url_list:
return "主域名信息不正确且存在index.html"
elif r_url != url_list[0] and e_url not in url_list:
return "主域名信息不正确"
elif r_url == url_list[0] and e_url in url_list:
return "存在index.html"
elif r_url == url_list[0] and e_url not in url_list:
return "Yes"
except:
# url_text = baidu_relust.content.decode('gbk')
return "Error"
except:
return "Error"
"""----------------模板检测------------------------"""
def model_check(end_url,headers):
pubic_base_css = end_url + "/static/css/public_base.css"
index_css = end_url + "/static/css/index_index.css"
pubic_base_js = end_url + "/static/js/public_base.js"
index_js = end_url + "/static/js/index_index.js"
target_js = end_url + "/static/js/monitor.js?h=target-jcmp.jianchuangwang.com"
try:
r1 = requests.get(pubic_base_css, headers=headers).status_code
r2 = requests.get(index_css, headers=headers).status_code
r3 = requests.get(pubic_base_js, headers=headers).status_code
r4 = requests.get(index_js, headers=headers).status_code
r5 = requests.get(target_js, headers=headers).status_code
zt = [r1, r2, r3, r4, r5]
zt_nb = zt.count(200) # 统计5个元素中出现200的次数
if zt_nb < 4: # 当静态资源出现超过2个错误是,就认为模板存在问题
# print("静态报错:" + end_url + " r1 %s r2 %s r3 %s r4 %s r5 %s" % (str(r1), str(r2), str(r3), str(r4), str(r5)))
return model_again(end_url, headers)
else:
return "Yes"
except:
# print("静态资源进不去:" + end_url)
return model_again(end_url, headers)
"""模板重试机制"""
def model_again(end_url, headers):#301重试机制
a = 1
while a < 4 :
try:
pubic_base_css = end_url + "/static/css/public_base.css"
index_css = end_url + "/static/css/index_index.css"
pubic_base_js = end_url + "/static/js/public_base.js"
index_js = end_url + "/static/js/index_index.js"
target_js = end_url + "/static/js/monitor.js?h=target-jcmp.jianchuangwang.com"
try:
r1 = requests.get(pubic_base_css, headers=headers).status_code
r2 = requests.get(index_css, headers=headers).status_code
r3 = requests.get(pubic_base_js, headers=headers).status_code
r4 = requests.get(index_js, headers=headers).status_code
r5 = requests.get(target_js, headers=headers).status_code
zt = [r1, r2, r3, r4, r5]
zt_nb = zt.count(200) # 统计5个元素中出现200的次数
if zt_nb < 4: # 当静态资源出现超过2个错误是,就认为模板存在问题
a += 1
if a == 4:
return "No"
else:
time.sleep(1)
else:
return "Yes"
except:
a += 1
if a == 4:
return "No"
else:
time.sleep(1)
except:
a += 1
if a == 4:
return "No"
else:
time.sleep(1)
"""----------------域名状态检测--------------------"""
def check_again(end_url, headers):#域名重试机制
a = 1
while a < 4 :
try:
url_relust = requests.get(end_url, headers=headers)
url_status = url_relust.status_code
if url_status == 200:
return domain_check_result(url_relust)
else:
a += 1
if a == 4:
return "Error"
else:
time.sleep(1)
except:
a += 1
if a == 4:
return "Error"
else:
time.sleep(1)
def domain_check_result(url_relust):#域名判断规则
try:
url_text = url_relust.content.decode('utf-8')
if "模板文件不存在" in url_text:
return "模板不存在"
elif "Error" in url_text:
return ("内容有Error信息")
elif "网站暂时无法访问" in url_text:
return "网站暂时无法访问"
elif "网站根目录,网页文件请上传到这个目录" in url_text:
return "网站文件失败,域名已解析"
elif "Error" in url_text:
return "Error"
else:
return "Yes"
except:
url_text = url_relust.content.decode('gbk')
if "模板文件不存在" in url_text:
return "模板不存在"
elif "Error" in url_text:
return "内容有Error信息"
elif "网站暂时无法访问" in url_text:
return "网站暂时无法访问"
elif "网站根目录,网页文件请上传到这个目录" in url_text:
return "网站文件失败,域名已解析"
elif "Error" in url_text:
return "Error"
else:
return "Yes"
def domain_check(end_url,headers):#域名检测流程
try:
url_relust = requests.get(end_url, headers=headers)
url_status = url_relust.status_code
if url_status == 200:
try:
return domain_check_result(url_relust)
except:
return "结果判断异常"
else:
return check_again(end_url, headers)
except:
return check_again(end_url, headers)
"""-------------------域名表读写规则----------------------------------"""
def write_xlsx(wb,wb_sheet,number_data,use_row,headers,path):
for row in range(number_data,use_row):
url = wb_sheet.cell(row+1,3).value
if url == None:
pass
else:
end_url = " http://"+ url
result = domain_check(end_url,headers)
if "结果判断异常" == result:
wb_sheet.cell(row + 1, 4, value="结果判断异常")
wb_sheet.cell(row + 1, 5, value= model_check(end_url, headers))#网站打开状态
wb_sheet.cell(row + 1, 7, value=site_check(end_url, headers))#网站地图
wb_sheet.cell(row + 1, 8, value=three_go(end_url, headers))#301重定向
wb_sheet.cell(row + 1, 9, value=str(sn_check(end_url, headers)))#首页初始内容检测
elif "模板不存在" == result:
wb_sheet.cell(row + 1, 4, value="模板不存在")
wb_sheet.cell(row + 1, 5, value="Error")
wb_sheet.cell(row + 1, 7, value="--")
wb_sheet.cell(row + 1, 8, value="--")
wb_sheet.cell(row + 1, 9, value="--")
elif "网站暂时无法访问" == result:
wb_sheet.cell(row + 1, 4, value="网站暂时无法访问")
wb_sheet.cell(row + 1, 5, value="--")
wb_sheet.cell(row + 1, 7, value="--")
wb_sheet.cell(row + 1, 8, value="--")
wb_sheet.cell(row + 1, 9, value="--")
elif "网站文件失败,域名已解析" == result:
wb_sheet.cell(row + 1, 4, value="网站文件失败,域名已解析")
wb_sheet.cell(row + 1, 5, value="--")
wb_sheet.cell(row + 1, 7, value="--")
wb_sheet.cell(row + 1, 8, value="--")
wb_sheet.cell(row + 1, 9, value="--")
elif "Error" == result:
wb_sheet.cell(row + 1, 4, value="Error")
wb_sheet.cell(row + 1, 5, value="--")
wb_sheet.cell(row + 1, 7, value="--")
wb_sheet.cell(row + 1, 8, value="--")
wb_sheet.cell(row + 1, 9, value="--")
elif "内容有Error信息" == result:
wb_sheet.cell(row + 1, 4, value="内容有Error信息")
wb_sheet.cell(row + 1, 5, value=model_check(end_url, headers))
wb_sheet.cell(row + 1, 7, value=site_check(end_url, headers))
wb_sheet.cell(row + 1, 8, value=three_go(end_url, headers))
wb_sheet.cell(row + 1, 9, value=str(sn_check(end_url, headers)))
elif "Yes" == result:
wb_sheet.cell(row + 1, 4, value="Yes")
wb_sheet.cell(row + 1, 5, value=model_check(end_url, headers))
wb_sheet.cell(row + 1, 7, value=site_check(end_url, headers))
wb_sheet.cell(row + 1, 8, value=three_go(end_url, headers))
wb_sheet.cell(row + 1, 9, value=str(sn_check(end_url, headers)))
else:
wb_sheet.cell(row + 1, 4, value="其他异常待分析")
wb_sheet.cell(row + 1, 5, value="--")
wb_sheet.cell(row + 1, 7, value="--")
wb_sheet.cell(row + 1, 8, value="--")
wb_sheet.cell(row + 1, 9, value="--")
wb_sheet.cell(row + 1, 6, value=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#测试时间
wb.save(path)
def read_xlsx(path,number):
wb = load_workbook(path)
sheet_list = [sheet_id for sheet_id in wb]
wb_sheet = sheet_list[0]
use_row=wb_sheet.max_row
headers = {
"Content-Type": "application/json;charset=utf-8",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
}
number_data = int(use_row/number)
# print(number_data)
# 线程数1时
if number ==1:
write_xlsx(wb,wb_sheet, 1,use_row, headers,path)
# 线程数2时
elif number ==2:
if number_data>1:
threads = []
t1 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, 1,number_data, headers,path,))
threads.append(t1)
t2 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, number_data,use_row, headers,path,))
threads.append(t2)
for t in threads:
t.start()
for t in threads:
t.join()
else:
write_xlsx(wb, wb_sheet, 1, use_row, headers, path)
elif number ==3:
if number_data > 1:
threads = []
t1 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, 1,number_data, headers,path,))
threads.append(t1)
t2 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, number_data,2*number_data, headers,path,))
threads.append(t2)
t3 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, 2*number_data,use_row, headers,path,))
threads.append(t3)
for t in threads:
t.start()
for t in threads:
t.join()
else:
write_xlsx(wb, wb_sheet, 1, use_row, headers, path)
elif number ==4:
if number_data > 1:
threads = []
t1 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, 1,number_data, headers,path,))
threads.append(t1)
t2 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, number_data,2*number_data, headers,path,))
threads.append(t2)
t3 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, 2*number_data,3*number_data, headers,path,))
threads.append(t3)
t4 = threading.Thread(target=write_xlsx, args=(wb,wb_sheet, 3*number_data,use_row, headers,path,))
threads.append(t4)
for t in threads:
t.start()
for t in threads:
t.join()
else:
write_xlsx(wb, wb_sheet, 1, use_row, headers, path)
else:
print("最大线程是4!")
if __name__ == '__main__':
window = tk.Tk()
window.title('域名检测')
window.geometry('400x300')
"""登陆界面"""
tk.Label(window, text='域名表存放地址:').place(x=65,y=180)
tk.Label(window,text='检测完成后查看域名表结果,运行时请不要打开表格!').place(x=50,y=140)
var_usr_name = tk.StringVar()
enter_usr_name = tk.Entry(window, textvariable=var_usr_name)
enter_usr_name.place(x=160, y=180)
#创建一个下拉列表
def get_number():
return int(number_x.get().strip())
number_x = tk.StringVar()
number_x.set(4)
number_1 = tk.Radiobutton(window, text="线程数量:1", value=1, variable=number_x, command=get_number)
number_1.pack()
number_2 = tk.Radiobutton(window, text="线程数量:2", value=2, variable=number_x, command=get_number)
number_2.pack()
number_3 = tk.Radiobutton(window, text="线程数量:3", value=3, variable=number_x, command=get_number)
number_3.pack()
number_4 = tk.Radiobutton(window, text="线程数量:4", value=4, variable=number_x, command=get_number)
number_4.pack()
def usr_log_in():
#输入框内容
usr_name = var_usr_name.get().strip()
# 域名表
if usr_name == '' :
tk.messagebox.showerror(message='域名表不能为空!')
else:
usr_name = usr_name.split("\\")
end_path = ""
for path in usr_name:
end_path += str(path) + "\\"
end_path = end_path + "域名表.xlsx"
# print(end_path)
if not os.path.exists(end_path):
tk.messagebox.showerror(message='域名表路径不正确!')
else:
try:
number = get_number()
read_xlsx(end_path,number)
tk.messagebox.showinfo(title="检测成功", message='检测域名完成!')
except:
tk.messagebox.showerror(message='运行失败!')
#按钮
bt_login = tk.Button(window,text='域名检测',command=usr_log_in)
bt_login.place(x=190,y=240)
window.mainloop()