关闭

利用HttpURLConnection抓取网页取名

标签: stringimportnullurlexceptionclass
1186人阅读 评论(0) 收藏 举报
分类:

闲来无事,利用Java 的HttpURLConnection,使用多线程来抓取网页,计算名字的分数。

仅供娱乐。

 

程序如下 
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
 
import org.apache.log4j.Logger;
 
public class XingMing {
 
 static final Logger log = Logger.getLogger(XingMing.class);
 
 public static String read(String urlStr) {
   try {
     URL url = new URL(urlStr);
     HttpURLConnection connection = (HttpURLConnection) url
         .openConnection();
     connection.connect();
     InputStream in = connection.getInputStream();
     BufferedReader read = new BufferedReader(new InputStreamReader(in));
     StringBuffer buf = new StringBuffer();
     String line = null;
     while ((line = read.readLine()) != null) {
       buf.append(line);
     }
     return buf.toString();
   } catch (MalformedURLException e) {
     return null;
   } catch (IOException e) {
     return null;
   }
 }
 
 public static String find(String str, String beginStr, String endStr) {
   final int length = beginStr.length();
   int index = str.indexOf(beginStr);
   String result = null;
   if (index != -1) {
     int index2 = str.indexOf(endStr, index + length);
     if (index2 != -1) {
       result = str.substring(index + beginStr.length(), index2);
     }
   }
   return result;
 }
 
 public static String findName(String source, String name) {
   // value=我的姓名『XX』的分析:
   return find(source, "value=我的姓名『", "』的分析");
 }
 
 public static String findScore(String source, String name) {
   // <font size=3>姓名评分:</font><font color=0000ff size=5FONT-SIZE: 10pt;">
   // BT,楷体">99.5</font>
   return find(
       source,
       "<font size=3>姓名评分:</font><font color=0000ff size=5 BT,楷体/">",
       "</font>");
 }
 
 public static void main(String[] args) throws IOException {
   final char firstChar = '';
   final char lastChar = '';
   // 最大开启100个线程,可以加快查询速度.
   int maxThread = 100;
   int step = (lastChar - firstChar) / maxThread;
   for (int i = 0; i < maxThread; i++) {
     char start = (char) (firstChar + i * step);
     char end = (char) (firstChar + i * step + step - 1);
     System.out.println("开启" + (i + 1) + "处理:" + start + "-" + end
         + (char) (end + 1));
     new CallThread(start, end).start();
   }
 }
 
 static class CallThread extends Thread {
   private charstart;
   private charend;
   private String info;
 
   CallThread(char start, char end) {
     this.start = start;
     this.end = end;
     this.info = this.start + "-" + this.end;
   }
 
   public void run() {
     //
     final char youname1 = '';
     final String url = "http://www.xingming.net/cmjg-mz.asp?sex=&youname1="
         + youname1 + "&youname2=";
     String youname2;
     String webinfo = null;
     for (char i = start; i <= end; i++, webinfo = null) {
       // 名字规则自己取吧.
       // youname2 = "" + i;
       // youname2 = "" + i + i;
       youname2 = i + "";
       for (int j = 0; j < 5 && webinfo == null; j++) {
         webinfo = XingMing.read(url + youname2);
       }
       if (webinfo == null) {
         log.warn("获取名字[" + youname1 + youname2 + "]失败");
         continue;
       }
       String webName = XingMing.findName(webinfo, "[" + youname2
           + "]");
       String webScore = XingMing.findScore(webinfo, "[" + youname2
           + "]");
       try {
         if (Float.parseFloat(webScore) >= 90) {
           System.out.println(youname2 + ":" + webName + ":"
               + webScore);
         }
       } catch (Exception e) {
       }
       log.info(this.info + ":" + webName + ":" + webScore);
       if ((i - start) % 100 == 0) {
         System.out.println(this.info + "处理了" + (i - start) + "");
       }
     }
     System.out.println(this.info + "结束了.....");
   }
 }
}
 
 
最新程序:
最终版宝宝取名程序,java版,我家宝宝名字已经确定。  

http://blog.csdn.net/z3h/archive/2008/01/16/2047420.aspx  

0
0
猜你在找
【直播】机器学习&数据挖掘7周实训--韦玮
【套餐】系统集成项目管理工程师顺利通关--徐朋
【直播】3小时掌握Docker最佳实战-徐西宁
【套餐】机器学习系列套餐(算法+实战)--唐宇迪
【直播】计算机视觉原理及实战--屈教授
【套餐】微信订阅号+服务号Java版 v2.0--翟东平
【直播】机器学习之矩阵--黄博士
【套餐】微信订阅号+服务号Java版 v2.0--翟东平
【直播】机器学习之凸优化--马博士
【套餐】Javascript 设计模式实战--曾亮
查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:150703次
    • 积分:1517
    • 等级:
    • 排名:千里之外
    • 原创:32篇
    • 转载:1篇
    • 译文:0篇
    • 评论:20条
    文章分类
    最新评论