/*==SearchEngines.java==*/
package qinshi.search;
/**
* <p>Title: SearchEngines.java</p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2005</p>
* <p>Company: ahead</p>
* <p>Date:2005-8-20</p>
* @author: zhaoshuxian
* @version: 1.0
*/
import java.io.*;
import java.net.*;
import javax.servlet.*;
import javax.servlet.http.*;
/**
* Servlet that takes a search string, number of results per
* page, and a search engine name, sending the query to
* that search engine. Illustrates manipulating
* the response status line. It sends a 302 response
* (via sendRedirect) if it gets a known search engine,
* and sends a 404 response (via sendError) otherwise.
*/
public class SearchEngines
extends HttpServlet {
private static final String CONTENT_TYPE = "text/html; charset=GB2312";
//Initialize global variables
public void init() throws ServletException {
}
//Process the HTTP Get request
public void doGet(HttpServletRequest request, HttpServletResponse response) throws
ServletException, IOException {
request.setCharacterEncoding("gb2312");
response.setContentType(CONTENT_TYPE);
PrintWriter out = response.getWriter();
String searchString = request.getParameter("searchString");
if ( (searchString == null) ||
(searchString.length() == 0)) {
reportProblem(response, "Missing search string.");
return;
}
/**
* The URLEncoder changes spaces to "+" signs and other
* non-alphanumeric characters to "%XY", where XY is the
* hex value of the ASCII (or ISO Latin-1) character.
* Browsers always URL-encode form values, so the
* getParameter method decodes automatically. But since
*/
//searchString = URLEncoder.encode(searchString);
searchString = SearchSpec.Iso2Gb(searchString);
String numResults = request.getParameter("numResults");
/**
* 138 Chapter 6Generating the Server Response:HTTP Status Codes
*/
if ( (numResults == null) ||
(numResults.equals("0")) ||
(numResults.length() == 0)) {
numResults = "10";
}
String searchEngine = request.getParameter("searchEngine");
if (searchEngine == null) {
reportProblem(response, "Missing search engine name.");
return;
}
SearchSpec[] commonSpecs = SearchSpec.getCommonSpecs();
for (int i = 0; i < commonSpecs.length; i++) {
SearchSpec searchSpec = commonSpecs[i];
if (searchSpec.getName().equals(searchEngine)) {
String aaurl = response.encodeURL(searchSpec.makeURL(searchString,searchSpec.getNumResultsSuffix(), numResults));
//response.sendRedirect(url);
//return;
/*=====此部分为copy江主任的,不想写那个HTML,麻烦====================================*/
out.println("<html>");
out.println("<head><title>search</title></head>");
out.println("<style type=/"text/css/"><!--body {font-family: /"宋体/";font-size: 12px; text-decoration: none;}table { font-family: /"宋体/"; font-size: 12px;}a { text-decoration: none;}--></style>");
out.println("<body bgcolor=/"#ffffff/">");
out.println("搜索引擎. Designer:zhaoshuxian");
out.println("<br/>您的搜索关键字:" + searchString + " <br/>");
searchString = SearchSpec.StrReplace(searchString," ","%20");
SearchSpec urlc = new SearchSpec();
String baidunr1 = (String)urlc.getResult(aaurl);
//out.println("测试成功"+baidunr1);
String[] baidunrArr=new String[1000];
if(searchSpec.getName().equals("google")){
baidunrArr = baidunr1.split("<p class=g>");
}else if(searchSpec.getName().equals("baidu")){
baidunrArr = baidunr1.split("<p class=f1>");
}
out.println("baidunrArr.length:"+baidunrArr);
for (int j = 1; j < baidunrArr.length; j++) {
String[] _nt = urlc.getnrResult(baidunrArr[j]);
String url = _nt[0];
String title = _nt[1];
String content = _nt[2];
out.println("<table width=/"600/" border=/"0/" align=/"center/" cellpadding=/"1/" cellspacing=/"1/" bgcolor=/"gray/">");
out.println(" <tr>");
out.println("<td bgcolor=/"#FFFFFF/"><table width=/"100%/" border=/"0/" align=/"center/" cellpadding=/"1/" cellspacing=/"1/">");
out.println(" <tr> ");
out.println(" <td><strong><a href='" + url +
"' target='_blank' style='color:#3366FF'>" + title +
"</a></strong></td>");
out.println(" </tr>");
out.println(" <tr> ");
out.println(" <td>" + content + "</td>");
out.println(" </tr>");
out.println(" <tr> ");
//判断url长度
if (url.length() > 100) {
url = url.substring(0, 100) + "..";
}
out.println(" <td><font color='#339966'><em>" + url +
"</em></font></td>");
out.println(" </tr>");
out.println(" </table></td>");
out.println(" </tr>");
out.println("</table>");
out.println("<br/>");
}
if (baidunrArr.length < 3) {
out.print("<font color='red'>没有搜索到符合条件的结果!</font>");
return;
}
out.println(
"<table height='24' border='0' align='center' cellpadding='1' cellspacing='1'>");
out.println(" <tr> ");
for (int j = 0; j < 5; j++) {
for (int k = 0; k < 5; k++) {
out.println(" <td width='20'> ");
out.println(" <div align='center'><a href='searchengines?searchString=" +
searchString + "&start=" + (j * 10 + k) + "'>" +
(j * 10 + k + 1) +
"</a></div></td>");
out.println(" <td width='20'> ");
}
out.println("</tr>");
out.println("<tr>");
}
out.println(" </tr>");
out.println("</table>");
out.println("</body></html>");
/*==============================================*/
}
}
reportProblem(response, "Unrecognized search engine.");
}
/**
*
* @param response HttpServletResponse
* @param message String
* @throws IOException
*/
private void reportProblem(HttpServletResponse response, String message) throws
IOException {
response.sendError(response.SC_NOT_FOUND, "<H2>" + message + "</H2>");
}
/**
* Process the HTTP Post request
* @param request HttpServletRequest
* @param response HttpServletResponse
* @throws ServletException
* @throws IOException
*/
public void doPost(HttpServletRequest request, HttpServletResponse response) throws
ServletException, IOException {
doGet(request, response);
}
/**
* Clean up resources
*/
public void destroy() {
}
}
/*==========SearchSpec.java====*/
package qinshi.search;
import java.io.*;
import java.net.*;
/**
* <p>Title: SearchEngines.java</p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2005</p>
* <p>Company: ahead</p>
* <p>Date:2005-8-20</p>
* @author: zhaoshuxian
* @version: 1.0
*/
/**
* Small class that encapsulates how to construct a
* search string for a particular search engine.
*/
public class SearchSpec {
private String name, baseURL, numResultsSuffix;
private static SearchSpec[] commonSpecs = {
new SearchSpec("google", "http://www.google.com/search?q=",
"&num="),
new SearchSpec("infoseek", "http://infoseek.go.com/Titles?qt=",
"&nh="),
new SearchSpec("baidu",
"http://www.baidu.com/s?wd=",
"&pn=")
};
/**
* default function
*/
public SearchSpec() {
}
/**
*
* @param name String
* @param baseURL String
* @param numResultSuffix String
*/
public SearchSpec(String name,
String baseURL,
String numResultsSuffix) {
this.name = name;
this.baseURL = baseURL;
this.numResultsSuffix = numResultsSuffix;
}
/**
*
* @param searchString String
* @param numResults String
* @return String
*/
public String makeURL(String searchString, String numResultsSuffix,
String numResults) {
return (baseURL + searchString +
numResultsSuffix + numResults);
}
/**
*
* @return String
*/
public String getName() {
return (name);
}
/**
*
* @return String
*/
public String getNumResultsSuffix() {
return (numResultsSuffix);
}
/**
*
* @return SearchSpec[]
*/
public static SearchSpec[] getCommonSpecs() {
return (commonSpecs);
}
/*=========================================================*/
/**
* 字符串转码 ISO8859-1 -->gb2312
* @param str String
* @return String
*/
public static String Iso2Gb(String str) {
if (str == null) {
str = "";
}
try {
str = new String(str.getBytes("iso8859-1"), "gb2312");
}
catch (java.io.UnsupportedEncodingException uee) {
uee.printStackTrace();
}
return str;
}
/*=======字符转换代码.自己写不全,所以copy的========================================*/
/**
* 使用说明 StrReplace(String str, String pattern, String replace)<br/>
* str:源字符串 将pattern替换为replace<br/>
* 如:StrReplace("abc","a","eee");<br/>
* 返回结果是:eeebc<br/>
*
*/
public static String StrReplace(String str, String pattern, String replace) {
int s = 0;
int e = 0;
StringBuffer result = new StringBuffer();
while ( (e = str.indexOf(pattern, s)) >= 0) {
result.append(str.substring(s, e));
result.append(replace);
s = e + pattern.length();
}
result.append(str.substring(s));
return result.toString();
}
public static boolean isHttp(String s) {
if (s == null) {
return false;
}
s = (s.toLowerCase()).trim();
int p1 = s.indexOf("http://"); // "http://"的位置
if (p1 != -1) {
s = s.substring(p1 + 7); // "http://"以后的字符
}
else {
p1 = s.indexOf("https://"); // "https://"的位置
if (p1 != -1) {
s = s.substring(p1 + 8); // "https://"以后的字符
}
}
if (p1 != -1 && p1 != 0) {
return false;
}
if (s.indexOf("//") != -1) {
return false;
}
int p2 = s.indexOf("/"); // 域名
int p6 = s.indexOf(":"); // 端口
if (p2 != -1) {
s = s.substring(0, p2);
}
if (p6 != -1) {
s = s.substring(0, p6);
}
int p3 = s.indexOf("."); // 域名中第一个"."的位置
if (p3 == -1) {
return false;
}
int p4 = s.indexOf(".", p3 + 1); // 域名中第二个"."的位置
int p5 = s.indexOf(".", p4 + 1); // 域名中第三个"."的位置
int len = s.length(); //字符串的长度
try {
if (p3 == -1) {
return false;
}
int ip1 = Integer.parseInt(s.substring(0, p3));
if (ip1 > 255 || ip1 < 1) {
return false;
}
if (p4 == -1) {
return false;
}
ip1 = Integer.parseInt(s.substring(p3 + 1, p4));
if (ip1 > 255 || ip1 < 0) {
return false;
}
if (p5 == -1) {
return false;
}
ip1 = Integer.parseInt(s.substring(p4 + 1, p5));
if (ip1 > 255 || ip1 < 0) {
return false;
}
ip1 = Integer.parseInt(s.substring(p5 + 1));
if (ip1 > 255 || ip1 < 0) {
return false;
}
}
catch (Exception ex) {
/*if(p3 < 3 || p3 > 4) { //判断前缀是否为www
return false;
}*/
if (p5 == -1) { //是否为顶级域名
if ( (len - p4) < 3 || (len - p4) > 4) { //域名后缀是否为三位 com
return false;
}
}
else {
if ( (p5 - p4) < 3 || (p5 - p4) > 4) {
return false;
}
if ( (len - p5) != 3) { //国家代码 cn
return false;
}
}
}
return true;
}
/*=========================================================*/
public String getResult(String _url) {
String resultstr = "";
try {
//System.out.println("url:" + _url);
URL myurl = new URL(_url);
URLConnection hpCon = (URLConnection) myurl.openConnection();
//java.util.Map field = hpCon.getHeaderFields();
//=====
//System.out.println("HeaderFields" + field.values());
hpCon.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
hpCon.setRequestProperty("Accept-Language", "zh-cn");
hpCon.setRequestProperty("Accept", "*/*");
hpCon.setRequestProperty("Accept-Encoding", "*");
//
//System.out.println("执行到了这里!");
DataInputStream in = new DataInputStream(hpCon.getInputStream());
//BufferedReader in = new BufferedReader(new InputStreamReader(myurl.openStream()));
String inputLine;
StringBuffer readcontent = new StringBuffer(1024);
//System.out.println("readcontent:" + readcontent);
while ( (inputLine = in.readLine()) != null) {
readcontent.append(Iso2Gb(inputLine));
}
in.close();
resultstr = readcontent.toString();
resultstr.toLowerCase();
//
//System.out.println("before.resultstr" + resultstr);
int start = resultstr.indexOf("<p class=g>");
int end = resultstr.indexOf("<div class=n>");
//
System.out.println("baidu" + start + " " + end);
resultstr = resultstr.substring(start, end);
}
catch (Exception e) {
resultstr = "";
}
//
//System.out.print("last.resultstr=" + resultstr);
return resultstr;
}
/*=====copy的,只是不想打字罢了========================================*/
public static String[] getnrResult(String resultstr) {
String[] nr = new String[3];
nr[0] = ""; //url
nr[1] = ""; //title
nr[2] = ""; //content
String httpurl = "";
int httpurl_start = 0;
int httpurl_end = 0;
httpurl_start = resultstr.indexOf("http://");
httpurl_end = resultstr.indexOf("target");
httpurl = resultstr.substring(httpurl_start, httpurl_end).trim();
if (httpurl.indexOf('/"') != -1) {
httpurl = httpurl.replace('/"', ' ');
}
nr[0] = httpurl;
resultstr = resultstr.substring(httpurl_end);
String title = "";
int title_start = 0;
int title_end = 0;
title_start = resultstr.indexOf(">");
title_end = resultstr.indexOf("</a>");
title = resultstr.substring(title_start + 1, title_end).trim();
nr[1] = title;
resultstr = resultstr.substring(title_end);
String content = "";
int content_start = 0;
int content_end = 0;
content_start = resultstr.indexOf("<font size=-1>");
content_end = resultstr.indexOf("<font color=#008000>");
content = resultstr.substring(content_start, content_end).trim();
nr[2] = content;
return nr;
}
/*=========测试代码,可以不要的=======================================*/
public static void main(String[] args) {
SearchSpec urlc = new SearchSpec();
long time1 = System.currentTimeMillis();
String baidunr1 = urlc.getResult("http://www.google.com/search?q=ahead&num=10").toLowerCase().trim();
//============
System.out.println("调用成功:"+baidunr1);
//===============
String[] baidunrArr = baidunr1.split("<p class=p2>");
for (int j = 1; j < baidunrArr.length; j++) {
if (baidunrArr[j].startsWith("<a href")) {
String[] _nt = getnrResult(baidunrArr[j]);
System.out.println("------------------------------------------");
System.out.println("URL:" + _nt[0] + "<br/>");
System.out.println("标题:" + _nt[1] + "<br/>");
System.out.println("内容:" + _nt[2] + "<br/>");
System.out.println("------------------------------------------");
}
}
}
/*========================================================*/
}
package qinshi.search;
/**
* <p>Title: SearchEngines.java</p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2005</p>
* <p>Company: ahead</p>
* <p>Date:2005-8-20</p>
* @author: zhaoshuxian
* @version: 1.0
*/
import java.io.*;
import java.net.*;
import javax.servlet.*;
import javax.servlet.http.*;
/**
* Servlet that takes a search string, number of results per
* page, and a search engine name, sending the query to
* that search engine. Illustrates manipulating
* the response status line. It sends a 302 response
* (via sendRedirect) if it gets a known search engine,
* and sends a 404 response (via sendError) otherwise.
*/
public class SearchEngines
extends HttpServlet {
private static final String CONTENT_TYPE = "text/html; charset=GB2312";
//Initialize global variables
public void init() throws ServletException {
}
//Process the HTTP Get request
public void doGet(HttpServletRequest request, HttpServletResponse response) throws
ServletException, IOException {
request.setCharacterEncoding("gb2312");
response.setContentType(CONTENT_TYPE);
PrintWriter out = response.getWriter();
String searchString = request.getParameter("searchString");
if ( (searchString == null) ||
(searchString.length() == 0)) {
reportProblem(response, "Missing search string.");
return;
}
/**
* The URLEncoder changes spaces to "+" signs and other
* non-alphanumeric characters to "%XY", where XY is the
* hex value of the ASCII (or ISO Latin-1) character.
* Browsers always URL-encode form values, so the
* getParameter method decodes automatically. But since
*/
//searchString = URLEncoder.encode(searchString);
searchString = SearchSpec.Iso2Gb(searchString);
String numResults = request.getParameter("numResults");
/**
* 138 Chapter 6Generating the Server Response:HTTP Status Codes
*/
if ( (numResults == null) ||
(numResults.equals("0")) ||
(numResults.length() == 0)) {
numResults = "10";
}
String searchEngine = request.getParameter("searchEngine");
if (searchEngine == null) {
reportProblem(response, "Missing search engine name.");
return;
}
SearchSpec[] commonSpecs = SearchSpec.getCommonSpecs();
for (int i = 0; i < commonSpecs.length; i++) {
SearchSpec searchSpec = commonSpecs[i];
if (searchSpec.getName().equals(searchEngine)) {
String aaurl = response.encodeURL(searchSpec.makeURL(searchString,searchSpec.getNumResultsSuffix(), numResults));
//response.sendRedirect(url);
//return;
/*=====此部分为copy江主任的,不想写那个HTML,麻烦====================================*/
out.println("<html>");
out.println("<head><title>search</title></head>");
out.println("<style type=/"text/css/"><!--body {font-family: /"宋体/";font-size: 12px; text-decoration: none;}table { font-family: /"宋体/"; font-size: 12px;}a { text-decoration: none;}--></style>");
out.println("<body bgcolor=/"#ffffff/">");
out.println("搜索引擎. Designer:zhaoshuxian");
out.println("<br/>您的搜索关键字:" + searchString + " <br/>");
searchString = SearchSpec.StrReplace(searchString," ","%20");
SearchSpec urlc = new SearchSpec();
String baidunr1 = (String)urlc.getResult(aaurl);
//out.println("测试成功"+baidunr1);
String[] baidunrArr=new String[1000];
if(searchSpec.getName().equals("google")){
baidunrArr = baidunr1.split("<p class=g>");
}else if(searchSpec.getName().equals("baidu")){
baidunrArr = baidunr1.split("<p class=f1>");
}
out.println("baidunrArr.length:"+baidunrArr);
for (int j = 1; j < baidunrArr.length; j++) {
String[] _nt = urlc.getnrResult(baidunrArr[j]);
String url = _nt[0];
String title = _nt[1];
String content = _nt[2];
out.println("<table width=/"600/" border=/"0/" align=/"center/" cellpadding=/"1/" cellspacing=/"1/" bgcolor=/"gray/">");
out.println(" <tr>");
out.println("<td bgcolor=/"#FFFFFF/"><table width=/"100%/" border=/"0/" align=/"center/" cellpadding=/"1/" cellspacing=/"1/">");
out.println(" <tr> ");
out.println(" <td><strong><a href='" + url +
"' target='_blank' style='color:#3366FF'>" + title +
"</a></strong></td>");
out.println(" </tr>");
out.println(" <tr> ");
out.println(" <td>" + content + "</td>");
out.println(" </tr>");
out.println(" <tr> ");
//判断url长度
if (url.length() > 100) {
url = url.substring(0, 100) + "..";
}
out.println(" <td><font color='#339966'><em>" + url +
"</em></font></td>");
out.println(" </tr>");
out.println(" </table></td>");
out.println(" </tr>");
out.println("</table>");
out.println("<br/>");
}
if (baidunrArr.length < 3) {
out.print("<font color='red'>没有搜索到符合条件的结果!</font>");
return;
}
out.println(
"<table height='24' border='0' align='center' cellpadding='1' cellspacing='1'>");
out.println(" <tr> ");
for (int j = 0; j < 5; j++) {
for (int k = 0; k < 5; k++) {
out.println(" <td width='20'> ");
out.println(" <div align='center'><a href='searchengines?searchString=" +
searchString + "&start=" + (j * 10 + k) + "'>" +
(j * 10 + k + 1) +
"</a></div></td>");
out.println(" <td width='20'> ");
}
out.println("</tr>");
out.println("<tr>");
}
out.println(" </tr>");
out.println("</table>");
out.println("</body></html>");
/*==============================================*/
}
}
reportProblem(response, "Unrecognized search engine.");
}
/**
*
* @param response HttpServletResponse
* @param message String
* @throws IOException
*/
private void reportProblem(HttpServletResponse response, String message) throws
IOException {
response.sendError(response.SC_NOT_FOUND, "<H2>" + message + "</H2>");
}
/**
* Process the HTTP Post request
* @param request HttpServletRequest
* @param response HttpServletResponse
* @throws ServletException
* @throws IOException
*/
public void doPost(HttpServletRequest request, HttpServletResponse response) throws
ServletException, IOException {
doGet(request, response);
}
/**
* Clean up resources
*/
public void destroy() {
}
}
/*==========SearchSpec.java====*/
package qinshi.search;
import java.io.*;
import java.net.*;
/**
* <p>Title: SearchEngines.java</p>
* <p>Description: </p>
* <p>Copyright: Copyright (c) 2005</p>
* <p>Company: ahead</p>
* <p>Date:2005-8-20</p>
* @author: zhaoshuxian
* @version: 1.0
*/
/**
* Small class that encapsulates how to construct a
* search string for a particular search engine.
*/
public class SearchSpec {
private String name, baseURL, numResultsSuffix;
private static SearchSpec[] commonSpecs = {
new SearchSpec("google", "http://www.google.com/search?q=",
"&num="),
new SearchSpec("infoseek", "http://infoseek.go.com/Titles?qt=",
"&nh="),
new SearchSpec("baidu",
"http://www.baidu.com/s?wd=",
"&pn=")
};
/**
* default function
*/
public SearchSpec() {
}
/**
*
* @param name String
* @param baseURL String
* @param numResultSuffix String
*/
public SearchSpec(String name,
String baseURL,
String numResultsSuffix) {
this.name = name;
this.baseURL = baseURL;
this.numResultsSuffix = numResultsSuffix;
}
/**
*
* @param searchString String
* @param numResults String
* @return String
*/
public String makeURL(String searchString, String numResultsSuffix,
String numResults) {
return (baseURL + searchString +
numResultsSuffix + numResults);
}
/**
*
* @return String
*/
public String getName() {
return (name);
}
/**
*
* @return String
*/
public String getNumResultsSuffix() {
return (numResultsSuffix);
}
/**
*
* @return SearchSpec[]
*/
public static SearchSpec[] getCommonSpecs() {
return (commonSpecs);
}
/*=========================================================*/
/**
* 字符串转码 ISO8859-1 -->gb2312
* @param str String
* @return String
*/
public static String Iso2Gb(String str) {
if (str == null) {
str = "";
}
try {
str = new String(str.getBytes("iso8859-1"), "gb2312");
}
catch (java.io.UnsupportedEncodingException uee) {
uee.printStackTrace();
}
return str;
}
/*=======字符转换代码.自己写不全,所以copy的========================================*/
/**
* 使用说明 StrReplace(String str, String pattern, String replace)<br/>
* str:源字符串 将pattern替换为replace<br/>
* 如:StrReplace("abc","a","eee");<br/>
* 返回结果是:eeebc<br/>
*
*/
public static String StrReplace(String str, String pattern, String replace) {
int s = 0;
int e = 0;
StringBuffer result = new StringBuffer();
while ( (e = str.indexOf(pattern, s)) >= 0) {
result.append(str.substring(s, e));
result.append(replace);
s = e + pattern.length();
}
result.append(str.substring(s));
return result.toString();
}
public static boolean isHttp(String s) {
if (s == null) {
return false;
}
s = (s.toLowerCase()).trim();
int p1 = s.indexOf("http://"); // "http://"的位置
if (p1 != -1) {
s = s.substring(p1 + 7); // "http://"以后的字符
}
else {
p1 = s.indexOf("https://"); // "https://"的位置
if (p1 != -1) {
s = s.substring(p1 + 8); // "https://"以后的字符
}
}
if (p1 != -1 && p1 != 0) {
return false;
}
if (s.indexOf("//") != -1) {
return false;
}
int p2 = s.indexOf("/"); // 域名
int p6 = s.indexOf(":"); // 端口
if (p2 != -1) {
s = s.substring(0, p2);
}
if (p6 != -1) {
s = s.substring(0, p6);
}
int p3 = s.indexOf("."); // 域名中第一个"."的位置
if (p3 == -1) {
return false;
}
int p4 = s.indexOf(".", p3 + 1); // 域名中第二个"."的位置
int p5 = s.indexOf(".", p4 + 1); // 域名中第三个"."的位置
int len = s.length(); //字符串的长度
try {
if (p3 == -1) {
return false;
}
int ip1 = Integer.parseInt(s.substring(0, p3));
if (ip1 > 255 || ip1 < 1) {
return false;
}
if (p4 == -1) {
return false;
}
ip1 = Integer.parseInt(s.substring(p3 + 1, p4));
if (ip1 > 255 || ip1 < 0) {
return false;
}
if (p5 == -1) {
return false;
}
ip1 = Integer.parseInt(s.substring(p4 + 1, p5));
if (ip1 > 255 || ip1 < 0) {
return false;
}
ip1 = Integer.parseInt(s.substring(p5 + 1));
if (ip1 > 255 || ip1 < 0) {
return false;
}
}
catch (Exception ex) {
/*if(p3 < 3 || p3 > 4) { //判断前缀是否为www
return false;
}*/
if (p5 == -1) { //是否为顶级域名
if ( (len - p4) < 3 || (len - p4) > 4) { //域名后缀是否为三位 com
return false;
}
}
else {
if ( (p5 - p4) < 3 || (p5 - p4) > 4) {
return false;
}
if ( (len - p5) != 3) { //国家代码 cn
return false;
}
}
}
return true;
}
/*=========================================================*/
public String getResult(String _url) {
String resultstr = "";
try {
//System.out.println("url:" + _url);
URL myurl = new URL(_url);
URLConnection hpCon = (URLConnection) myurl.openConnection();
//java.util.Map field = hpCon.getHeaderFields();
//=====
//System.out.println("HeaderFields" + field.values());
hpCon.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
hpCon.setRequestProperty("Accept-Language", "zh-cn");
hpCon.setRequestProperty("Accept", "*/*");
hpCon.setRequestProperty("Accept-Encoding", "*");
//
//System.out.println("执行到了这里!");
DataInputStream in = new DataInputStream(hpCon.getInputStream());
//BufferedReader in = new BufferedReader(new InputStreamReader(myurl.openStream()));
String inputLine;
StringBuffer readcontent = new StringBuffer(1024);
//System.out.println("readcontent:" + readcontent);
while ( (inputLine = in.readLine()) != null) {
readcontent.append(Iso2Gb(inputLine));
}
in.close();
resultstr = readcontent.toString();
resultstr.toLowerCase();
//
//System.out.println("before.resultstr" + resultstr);
int start = resultstr.indexOf("<p class=g>");
int end = resultstr.indexOf("<div class=n>");
//
System.out.println("baidu" + start + " " + end);
resultstr = resultstr.substring(start, end);
}
catch (Exception e) {
resultstr = "";
}
//
//System.out.print("last.resultstr=" + resultstr);
return resultstr;
}
/*=====copy的,只是不想打字罢了========================================*/
public static String[] getnrResult(String resultstr) {
String[] nr = new String[3];
nr[0] = ""; //url
nr[1] = ""; //title
nr[2] = ""; //content
String httpurl = "";
int httpurl_start = 0;
int httpurl_end = 0;
httpurl_start = resultstr.indexOf("http://");
httpurl_end = resultstr.indexOf("target");
httpurl = resultstr.substring(httpurl_start, httpurl_end).trim();
if (httpurl.indexOf('/"') != -1) {
httpurl = httpurl.replace('/"', ' ');
}
nr[0] = httpurl;
resultstr = resultstr.substring(httpurl_end);
String title = "";
int title_start = 0;
int title_end = 0;
title_start = resultstr.indexOf(">");
title_end = resultstr.indexOf("</a>");
title = resultstr.substring(title_start + 1, title_end).trim();
nr[1] = title;
resultstr = resultstr.substring(title_end);
String content = "";
int content_start = 0;
int content_end = 0;
content_start = resultstr.indexOf("<font size=-1>");
content_end = resultstr.indexOf("<font color=#008000>");
content = resultstr.substring(content_start, content_end).trim();
nr[2] = content;
return nr;
}
/*=========测试代码,可以不要的=======================================*/
public static void main(String[] args) {
SearchSpec urlc = new SearchSpec();
long time1 = System.currentTimeMillis();
String baidunr1 = urlc.getResult("http://www.google.com/search?q=ahead&num=10").toLowerCase().trim();
//============
System.out.println("调用成功:"+baidunr1);
//===============
String[] baidunrArr = baidunr1.split("<p class=p2>");
for (int j = 1; j < baidunrArr.length; j++) {
if (baidunrArr[j].startsWith("<a href")) {
String[] _nt = getnrResult(baidunrArr[j]);
System.out.println("------------------------------------------");
System.out.println("URL:" + _nt[0] + "<br/>");
System.out.println("标题:" + _nt[1] + "<br/>");
System.out.println("内容:" + _nt[2] + "<br/>");
System.out.println("------------------------------------------");
}
}
}
/*========================================================*/
}