java采集csdn论坛源码

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @类名称
 * @业务描述
 * 来自Java课堂:www.javakt.com
 * 付费问答,是您值得信赖的伙伴
 * @author
 * @时间 2010-2-20 15:45:12
 */
public class WebContent {
 /** */
 /**
  * 读取一个网页全部内容
  */
 public String getOneHtml(String htmlurl) throws Exception {
  URL url;
  String temp;
  StringBuffer sb = new StringBuffer();

  url = new URL(htmlurl);
  BufferedReader in = new BufferedReader(new InputStreamReader(url
    .openStream(), "utf-8"));// 读取网页全部内容
  while ((temp = in.readLine()) != null) {
   sb.append(temp);
  }
  in.close();
  return sb.toString();
 }

 /**
  *
  * @param s
  * @return 获得网页标题
  */
 public String getTitle(String s, boolean isnew) {
  String regex;
  String title = "";
  List list = new ArrayList();
  regex = "<title>.*?</title>";
  Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ);
  Matcher ma = pa.matcher(s);
  while (ma.find()) {
   list.add(ma.group());
  }
  for (int i = 0; i < list.size(); i++) {
   title = title + list.get(i);
  }
  return outTag(title);
 }
 
 // 标记替换定义  其中FGF是被替换与要替换标记之间的分隔符
 private static String[] bjs = {"&lt;FGF<", "&gt;FGF76>", "&quot;FGF76/""};

 /**
  * 获得正文与回复,指新帖子
  */
 public String[] getTiezi(String s) {
  String regex;
  List list = new ArrayList();
  regex = "msgfont.*?</div>";
  Pattern pa = Pattern.compile(regex, Pattern.MULTILINE);
  Matcher ma = pa.matcher(s);
  while (ma.find()) {
   list.add(outTag(ma.group().replaceAll("msgfont/">", "").replaceAll("<br />", "/r/n").replaceAll("<br/>", "/r/n").replaceAll("&nbsp;", "")));
  }
  
  String[] reStr = new String[list.size()];
  for (int i = 0; i < reStr.length; i++) {
   reStr[i] = replaceByBj(bjs, (String) list.get(i));
  }
  return reStr;
  
 }
 
 public static String replaceByBj(String[] bjs, String nrstr){
     for(int i=0;i<bjs.length;i++){
      String[] bjd = bjs[i].split("NLLD76");
      nrstr = nrstr.replaceAll(bjd[0], bjd[1]);
     }
     return nrstr;
    }
 
 public static String[] getBjs() {
  return bjs;
 }

 public static void setBjs(String[] bjs) {
  WebContent.bjs = bjs;
 }

 /**
  * @方法名称 获得链接
  * @业务描述
  *
  * @author
  * @时间 2010-2-20 16:42:08
  */
 public String[] getCsdnLink(String s, boolean isnew) {
  if(!isnew){
   return getCsdnHisLink(s);
  }
  String regex;
  List list = new ArrayList();
  regex = "http://topic.csdn.net/u.*?.html";
  Pattern pa = Pattern.compile(regex, Pattern.MULTILINE);
  Matcher ma = pa.matcher(s);
  while (ma.find()) {
   list.add(ma.group());
  }
  String[] reStr = new String[list.size()];
  for (int i = 0; i < reStr.length; i++) {
   reStr[i] = (String) list.get(i);
  }
  return reStr;

 }
 
 public String[] getCsdnHisLink(String s) {
  String regex;
  List list = new ArrayList();
  regex = "http://topic.csdn.net.*?.html";
  Pattern pa = Pattern.compile(regex, Pattern.MULTILINE);
  Matcher ma = pa.matcher(s);
  while (ma.find()) {
   list.add(ma.group());
  }
  String[] reStr = new String[list.size()];
  for (int i = 0; i < reStr.length; i++) {
   reStr[i] = (String) list.get(i);
  }
  return reStr;

 }

 /**
  *
  * @param s
  * @return 获得链接
  */
 public List getLink(String s) {
  String regex;
  List list = new ArrayList();
  regex = "<a[^>]*href=</a>";
  Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
  Matcher ma = pa.matcher(s);
  while (ma.find()) {
   list.add(ma.group());
  }
  return list;
 }

 /**
  *
  * @param s
  * @return 获得脚本代码
  */
 public List getScript(String s) {
  String regex;
  List list = new ArrayList();
  regex = "<script.*?</script>";
  Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
  Matcher ma = pa.matcher(s);
  while (ma.find()) {
   list.add(ma.group());
  }
  return list;
 }

 /** */
 /**
  *
  * @param s
  * @return 获得CSS
  */
 public List getCSS(String s) {
  String regex;
  List list = new ArrayList();
  regex = "<style.*?</style>";
  Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
  Matcher ma = pa.matcher(s);
  while (ma.find()) {
   list.add(ma.group());
  }
  return list;
 }

 /** */
 /**
  *
  * @param s
  * @return 去掉标记
  */
 public String outTag(String s) {
  return s.replaceAll("<.*?>", "");
 }

 public static void main(String[] args) {
  WebContent w = new WebContent();
  String url = "http://forum.csdn.net/PointForum/Forum/BFTopicList.aspx?Alias=Java&ListType=UnClosedList&page=1";
  try {
   String s = w.getOneHtml(url);
   String[] title2 = w.getTiezi(s);
   for (int i = 0; i < title2.length; i++) {
    System.out.println(title2[i]);
   }
  } catch (Exception e) {
   e.printStackTrace();
  }
 }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
package com.xtqzi.administrator.servlet; import java.io.IOException; import java.io.PrintWriter; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.xtqzi.bean.Administrator; public class AddAdministratorServlet extends HttpServlet { /** * Constructor of the object. */ public AddAdministratorServlet() { super(); } /** * Destruction of the servlet. <br> */ public void destroy() { super.destroy(); // Just puts "destroy" string in log // Put your code here } /** * The doGet method of the servlet. <br> * * This method is called when a form has its tag value method equals to get. * * @param request the request send by the client to the server * @param response the response send by the server to the client * @throws ServletException if an error occurred * @throws IOException if an error occurred */ public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { this.doPost(request, response); } /** * The doPost method of the servlet. <br> * * This method is called when a form has its tag value method equals to post. * * @param request the request send by the client to the server * @param response the response send by the server to the client * @throws ServletException if an error occurred * @throws IOException if an error occurred */ public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=gbk"); PrintWriter out = response.getWriter(); String name=request.getParameter("username"); String password=request.getParameter("password"); if(password!=null&&name!=null) { Administrator ad=new Administrator(); ad.setName(name); ad.setPassword(password); ad.setRights(1); boolean b=ad.Insert(); if(b) { out.print("成功"); }else { out.print("失败,是否已经有了改管理员名称"); } }else { out.println("请输入完整数据"); } out.close(); } /** * Initialization of the servlet. <br> * * @throws ServletException if an error occurs */ public void init() throws ServletException { // Put your code here } }

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值