网页主动探测工具使用

单位的项目是IBatis做的,每个查询的SQL里面都有很多判断
  上次优化SQL之后,其中的一个分支报错,但是作为dba,不可能排查每一个分支.
  所以,干脆用爬虫爬过所有的网页,主动探测程序的异常.
  这样有两个好处
  1.可以主动查看网页是否异常 (500错误,404错误)
  2.可以筛查速度较慢的网页,从这个方向也可以定位慢SQL吧.(也有服务器资源不足,造成网络超时的情况)
  前提,
  必须是互联网公司,大多数网页不用登录也可以浏览
  首先,建表
  CREATE SEQUENCE seq_probe_id INCREMENT BY 1 START WITH 1 NOMAXvalue NOCYCLE CACHE 2000;
  create table probe(
  id int primary key,
  host varchar(40) not null,
  path varchar(500) not null,
  state int not null,
  taskTime int not null,
  type varchar(10) not null,
  createtime date default sysdate not null
  ) ;
  其中host是域名,path是网页的相对路径,state是HTTP状态码,taskTime是网页获取时间,单位是毫秒,type是类型(html,htm,jpg等)
  程序结构
  程序分三个主要步骤,再分别用三个队列实现生产者消费者模式.
  1.连接.根据连接队列的目标,使用Socket获取网页,然后放入解析队列
  2.解析.根据解析队列的内容,使用正则表达式获取该网页的合法连接,将其再放入连接队列.然后将解析的网页放入持久化队列
  3.持久化.将持久化队列的内容存入数据库,以便查询。
  程序使用三个步骤并行,每个步骤可以并发的方式.
但是通常来说,解析和持久化可以分别用单线程的方式执行.
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.InetAddress;
import java.net.Socket;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Probe {
private static final BlockingQueue<Task> CONNECTLIST = new LinkedBlockingQueue<Task>();
private static final BlockingQueue<Task> PARSELIST = new LinkedBlockingQueue<Task>();
private static final BlockingQueue<Task> PERSISTENCELIST = new LinkedBlockingQueue<Task>();
private static ExecutorService CONNECTTHREADPOOL;
private static ExecutorService PARSETHREADPOOL;
private static ExecutorService PERSISTENCETHREADPOOL;
private static final List<String> DOMAINLIST = new CopyOnWriteArrayList<>();
static {
CONNECTTHREADPOOL = Executors.newFixedThreadPool(200);
PARSETHREADPOOL = Executors.newSingleThreadExecutor();
PERSISTENCETHREADPOOL = Executors.newFixedThreadPool(1);
DOMAINLIST.add("域名");
}
public static void main(String args[]) throws Exception {
long start = System.currentTimeMillis();
CONNECTLIST.put(new Task("域名", 80, "/static/index.html"));
for (int i = 0; i < 600; i++) {
CONNECTTHREADPOOL.submit(new ConnectHandler(CONNECTLIST, PARSELIST));
}
PARSETHREADPOOL.submit(new ParseHandler(CONNECTLIST, PARSELIST, PERSISTENCELIST, DOMAINLIST));
PERSISTENCETHREADPOOL.submit(new PersistenceHandler(PERSISTENCELIST));
while (true) {
Thread.sleep(1000);
long end = System.currentTimeMillis();
float interval = ((end - start) / 1000);
int connectTotal = ConnectHandler.GETCOUNT();
int parseTotal = ParseHandler.GETCOUNT();
int persistenceTotal = PersistenceHandler.GETCOUNT();
int connectps = Math.round(connectTotal / interval);
int parseps = Math.round(parseTotal / interval);
int persistenceps = Math.round(persistenceTotal / interval);
System.out.print("\r连接总数:" + connectTotal + " \t每秒连接:" + connectps + "\t连接队列剩余:" + CONNECTLIST.size()
+ " \t解析总数:" + parseTotal + " \t每秒解析:" + parseps + "\t解析队列剩余:" + PARSELIST.size() + " \t持久化总数:"
+ persistenceTotal + " \t每秒持久化:" + persistenceps + "\t持久化队列剩余:" + PERSISTENCELIST.size());
}
}
}
class Task {
public Task() {
}
public void init(String host, int port, String path) {
this.setCurrentPath(path);
this.host = host;
this.port = port;
}
public Task(String host, int port, String path) {
init(host, port, path);
}
private String host;
private int port;
private String currentPath;
private long taskTime;
private String type;
private String content;
private int state;
public int getState() {
return state;
}
public void setState(int state) {
this.state = state;
}
public String getCurrentPath() {
return currentPath;
}
public void setCurrentPath(String currentPath) {
this.currentPath = currentPath;
this.type = currentPath.substring(currentPath.indexOf(".") + 1,
currentPath.indexOf("?") != -1 ? currentPath.indexOf("?") : currentPath.length());
}
public long getTaskTime() {
return taskTime;
}
public void setTaskTime(long taskTime) {
this.taskTime = taskTime;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getHost() {
return host;
}
public int getPort() {
return port;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}
class ParseHandler implements Runnable {
private static Set<String> SET = new ConcurrentSkipListSet<String>();
public static int GETCOUNT() {
return COUNT.get();
}
private static final AtomicInteger COUNT = new AtomicInteger();
private BlockingQueue<Task> connectlist;
private BlockingQueue<Task> parselist;
private BlockingQueue<Task> persistencelist;
List<String> domainlist;
private interface Filter {
void doFilter(Task fatherTask, Task newTask, String path, Filter chain);
}
private class FilterChain implements Filter {
private List<Filter> list = new ArrayList<Filter>();
{
addFilter(new TwoLevel());
addFilter(new OneLevel());
addFilter(new FullPath());
addFilter(new Root());
addFilter(new Default());
}
private void addFilter(Filter filter) {
list.add(filter);
}
private Iterator<Filter> it = list.iterator();
@Override
public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
if (it.hasNext()) {
it.next().doFilter(fatherTask, newTask, path, chain);
}
}
}
private class TwoLevel implements Filter {
@Override
public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
if (path.startsWith("../../")) {
String prefix = getPrefix(fatherTask.getCurrentPath(), 3);
newTask.init(fatherTask.getHost(), fatherTask.getPort(), path.replace("../../", prefix));
} else {
chain.doFilter(fatherTask, newTask, path, chain);
}
}
}
private class OneLevel implements Filter {
@Override
public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
if (path.startsWith("../")) {
String prefix = getPrefix(fatherTask.getCurrentPath(), 2);
newTask.init(fatherTask.getHost(), fatherTask.getPort(), path.replace("../", prefix));
} else {
chain.doFilter(fatherTask, newTask, path, chain);
}
}
}
private class FullPath implements Filter {
@Override
public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
if (path.startsWith("http://")) {
Iterator<String> it = domainlist.iterator();
boolean flag = false;
while (it.hasNext()) {
String domain = it.next();
if (path.startsWith("http://" + domain + "/")) {
newTask.init(domain, fatherTask.getPort(), path.replace("http://" + domain + "/", "/"));
flag = true;
break;
}
}
if (!flag) {
newTask = null;
}
} else {
chain.doFilter(fatherTask, newTask, path, chain);
}
}
}
private class Root implements Filter {
@Override
public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
if (path.startsWith("/")) {
newTask.init(fatherTask.getHost(), fatherTask.getPort(), path);
} else {
chain.doFilter(fatherTask, newTask, path, chain);
}
}
}
private class Default implements Filter {
@Override
public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
String prefix = getPrefix(fatherTask.getCurrentPath(), 1);
newTask.init(fatherTask.getHost(), fatherTask.getPort(), prefix + "/" + path);
}
}
public ParseHandler(BlockingQueue<Task> connectlist, BlockingQueue<Task> parselist,
BlockingQueue<Task> persistencelist, List<String> domainlist) {
this.connectlist = connectlist;
this.parselist = parselist;
this.persistencelist = persistencelist;
this.domainlist = domainlist;
}
private Pattern pattern = Pattern.compile("\"[^\"]+\\.htm[^\"]*\"");
private void handler() {
try {
Task task = parselist.take();
parseTaskState(task);
if (200 == task.getState()) {
Matcher matcher = pattern.matcher(task.getContent());
while (matcher.find()) {
String path = matcher.group();
if (!path.contains(" ") && !path.contains("\t") && !path.contains("(") && !path.contains(")")
&& !path.contains(":")) {
path = path.substring(1, path.length() - 1);
if (!SET.contains(path)) {
SET.add(path);
createNewTask(task, path);
}
}
}
}
task.setContent(null);
persistencelist.put(task);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void parseTaskState(Task task) {
if (task.getContent().startsWith("HTTP/1.1")) {
task.setState(Integer.parseInt(task.getContent().substring(9, 12)));
} else {
task.setState(Integer.parseInt(task.getContent().substring(19, 22)));
}
}
/**
* @param fatherTask
* @param path
* @throws Exception
*/
private void createNewTask(Task fatherTask, String path) throws Exception {
Task newTask = new Task();
FilterChain filterchain = new FilterChain();
filterchain.doFilter(fatherTask, newTask, path, filterchain);
if (newTask != null) {
connectlist.put(newTask);
}
}
private String getPrefix(String s, int count) {
String prefix = s;
while (count > 0) {
prefix = prefix.substring(0, prefix.lastIndexOf("/"));
count--;
}
return "".equals(prefix) ? "/" : prefix;
}
@Override
public void run() {
while (true) {
this.handler();
COUNT.addAndGet(1);
}
}
}
class ConnectHandler implements Runnable {
public static int GETCOUNT() {
return COUNT.get();
}
private static final AtomicInteger COUNT = new AtomicInteger();
private BlockingQueue<Task> connectlist;
private BlockingQueue<Task> parselist;
public ConnectHandler(BlockingQueue<Task> connectlist, BlockingQueue<Task> parselist) {
this.connectlist = connectlist;
this.parselist = parselist;
}
private void handler() {
try {
Task task = connectlist.take();
long start = System.currentTimeMillis();
getHtml(task);
long end = System.currentTimeMillis();
task.setTaskTime(end - start);
parselist.put(task);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void getHtml(Task task) throws Exception {
StringBuilder sb = new StringBuilder(2048);
InetAddress addr = InetAddress.getByName(task.getHost());
// 建立一个Socket
Socket socket = new Socket(addr, task.getPort());
// 发送命令,无非就是在Socket发送流的基础上加多一些握手信息,详情请了解HTTP协议
BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream(), "UTF-8"));
wr.write("GET " + task.getCurrentPath() + " HTTP/1.0\r\n");
wr.write("HOST:" + task.getHost() + "\r\n");
wr.write("Accept:*/*\r\n");
wr.write("\r\n");
wr.flush();
// 接收Socket返回的结果,并打印出来
BufferedReader rd = new BufferedReader(new InputStreamReader(socket.getInputStream()));
String line;
while ((line = rd.readLine()) != null) {
sb.append(line);
}
wr.close();
rd.close();
task.setContent(sb.toString());
socket.close();
}
@Override
public void run() {
while (true) {
this.handler();
COUNT.addAndGet(1);
}
}
}
class PersistenceHandler implements Runnable {
static {
try {
Class.forName("oracle.jdbc.OracleDriver");
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static int GETCOUNT() {
return COUNT.get();
}
private static final AtomicInteger COUNT = new AtomicInteger();
private BlockingQueue<Task> persistencelist;
public PersistenceHandler(BlockingQueue<Task> persistencelist) {
this.persistencelist = persistencelist;
try {
conn = DriverManager.getConnection("jdbc:oracle:thin:127.0.0.1:1521:orcl", "edmond", "edmond");
ps = conn
.prepareStatement("insert into probe(id,host,path,state,tasktime,type) values(seq_probe_id.nextval,?,?,?,?,?)");
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private Connection conn;
private PreparedStatement ps;
@Override
public void run() {
while (true) {
this.handler();
COUNT.addAndGet(1);
}
}
private void handler() {
try {
Task task = persistencelist.take();
ps.setString(1, task.getHost());
ps.setString(2, task.getCurrentPath());
ps.setInt(3, task.getState());
ps.setLong(4, task.getTaskTime());
ps.setString(5, task.getType());
ps.executeUpdate();
conn.commit();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
  ParseHandler 使用了一个职责链模式,
  TwoLevel 处理../../开头的连接(../../sucai/sucai.htm)
  OneLevel 处理../开头的连接(../sucai/sucai.htm)
  FullPath 处理绝对路径的连接(http://域名/sucai/sucai.htm)
  Root 处理/开头的连接(/sucai/sucai.htm)
  Default 处理常规的连接(sucai.htm)
  ParseHandler FullPath 过滤需要一个白名单.
  这样可以使程序在固定的域名爬行
  ParseHandler parseTaskState 解析状态码 可能需要根据实际情况进行调整
  比如网页404,服务器可能会返回一个错误页,而不是通常的HTTP状态码。
  第一版仅仅实现了功能,错误处理不完整,
  所以仅仅在定制的域名下生效,其实并不通用,后续会逐步完善.
最新内容请见作者的GitHub页:http://qaseven.github.io/
使用与更新说明 1.5.8 Beta 2018-01-28 一、增加了几个变量 %DomainNoPoint%为当前扫描目标域名去掉. wwwbaiducom %DomainUnderLine%为当前扫描目标域名将.换成下划线www_baidu_com %DomainCenterAndTldUnderLine%为当前扫描目标域名主体与后缀并将.换成下划线baidu_com %LastSubPath%为当前目标url最后一级目录 baidu.com/admin/data/ 则%LastSubPath%为data (注:此变量需要为Url加入/结尾,如未存在子目录则程序自动抛弃此行字典。) 1.5.7 Beta 2018-01-26 一、增加字典变量功能 目标域名为www.baidu.com的话 四个变量 分别是 %Domain% 为当前扫描目标域名www.baidu.com %SubDomain% 为当前扫描目标子域名www %DomainCenter% 为当前扫描目标域名主体baidu %DomainCenterAndTld% 为当前扫描目标域名主体与后缀baidu.com 二、给非head访问增加返回长度功能通过返回长度可有效识别一些通过关键字无法识别的自定义404页面 三、修复若干bug并取消了启动弹框 1.5.6 Beta 2017-08-30 一、修复关闭后进程不推出的问题 二、修复cookie支持兼容问题 1.5.5 Beta 2017-08-19 一、修改开始按钮的显示状态为动态(根据线程状态判断) 二、恢复暂停按钮 1.5.4 Beta 2017-08-14 一、接受建议调整功能可以自由输入http头不止支持useragent和cookie 二、接受建议调整界面自定义404的输入框挪到左侧 三、接受建议取消暂停按钮将开始和停止按钮合并 1.5.3 Beta 2017-08-09 一、接受建议让结果表格自动下滑 二、接受建议修改对延时扫描时间的小数支持 三、修复多URL扫描的多处bug 1.5.2 Beta 2017-08-06 一、接受建议加入cookie功能 二、接受建议修改对URL的处理方法去掉多余的’/’ 三、对3xx结果显示跳转后地址http://www.xxx. xxx/admin/index.php ——–> login_redir.php 1.5.1 Beta 2017-08-05 一、修改若干细节 1.5 Alpha 2017-07-28 一、修复延时扫描处多个bug 二、优化线程 三、修复从快捷方式启动后都取字典失败的问题 四、扫描前URL编码解决部分误报问题 1.4 至安全学员内测版2017-07-21 一、修改最大并发连接为100 二、对输入Url进行智能处理获取最后一个’/’的目录进行扫描 三、增加工作线程数显示控制停止后开始前的线程是否全部退出检查 1.3 Alpha 2017-07-20 一、增加停止按钮 二、增加暂停按钮 三、拼接字典时自动根据字典类型增加/ 符号使扫描结果更精准 1.2 Alpha 2017-07-18 一、精确扫描结果识别跳转页面 二、对输入参数进行判断 1.1 Alpha 2017-07-18 一、增加列表内容不同常用状态码使用不同颜色的功能绿色为200 红色为403 二、增加点击列头后对列表内容排序的功能 三、增加右键导出结果功能 四、美化部分文本内容 五、点击开始会自动重置工具方便重复使用不必重新打开 六、增加自选加载字典模式可多选! 1.0 Alpha 2017-07-17 秉承着为人民服务的想法还有就是Windows下我确实没有什么功能比较全又稳定的此类工具了此前用过破壳扫描器和dirburte 都不稳定经常崩,御剑虽好但是缺少很多小功能。 不扯这些了介绍一下功能吧 一、先说说并发线程数吧,虽然默认是20 但是加大也无妨看你自己的各个参数设置和机器网络等配置了。 二、超时时间自己视情况而定也不必多介绍了。 三、这个随机xff头和xr头套用百度上一段话能懂得自然懂得不懂得也无所谓很少能碰见需要用到的这种情况勾选后每次访问都会随机生成这两个IP值如果线程开的大可能比较耗cpu。 1、X-Forwarded-For是用于记录代理信息的,每经过一级代理X-Forwarded-For是用于记录代理信息的,每经过一级代理(匿名代理除外),代理服务器都会把这次请求的来源IP追加在X-Forwarded-For中,来自4.4.4.4的一个请求,header包含这样一行X-Forwarded-For: 1.1.1.1, 2.2.2.2, 3
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值