抓取搜房二手房


package soufun;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Calendar;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.HasChildFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.NotFilter;
import org.htmlparser.nodes.TextNode;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

public class HouseIDList {
private static Connection mysql_connection;
private static Statement area_statement;
private static Statement statement;
private static ResultSet area_resultSet;
private static ResultSet resultSet;

public static void main(String[] args) throws ParserException, IOException,
SQLException {
String mysql_url = "jdbc:mysql://localhost:3306/BuyHouseExpress?characterEncoding=UTF-8";
String username = "root";
String password = "soufunroot";
String query = "";
NodeList n_list = null;
NodeList n_list_1 = null;
NodeList n_list_2 = null;
NodeList n_list_3 = null;
Node node_a = null;
String house_link = null;
String temp_area_house_url = null;
String area_house_url = null;
String temp_date = null;
String house_face_img_url = null;
Boolean is_person = false;
int area_page = 0;
URL server = null;
HttpURLConnection connection = null;
Parser parser = null;
HasAttributeFilter attrfilter = null;
try {
Class.forName("com.mysql.jdbc.Driver");
mysql_connection = DriverManager.getConnection(mysql_url, username,
password);
}
// 捕获加载驱动程序异常
catch (ClassNotFoundException cnfex) {
System.err.println("装载 JDBC/ODBC 驱动程序失败。");
cnfex.printStackTrace();
System.exit(1); // terminate program
}
// 捕获连接数据库异常
catch (SQLException sqlex) {
System.err.println("无法连接数据库");
sqlex.printStackTrace();
System.exit(1); // terminate program
}
System.out.println("连接数据库成功");

area_statement = mysql_connection.createStatement();
statement = mysql_connection.createStatement();
query = "select house_url,id from uchome_z_soufun_area";
area_resultSet = area_statement.executeQuery(query);
if (area_resultSet.next()) {
do {
area_house_url = area_resultSet.getString(1);
server = new URL(area_house_url);
connection = (HttpURLConnection) server.openConnection();
parser = new Parser(connection);
attrfilter = new HasAttributeFilter("class", "number orange");
n_list = parser.extractAllNodesThatMatch(attrfilter);
if (n_list.size() > 0) {
area_page = (int) Math.ceil(Double.parseDouble(n_list
.elementAt(0).getNextSibling().toPlainTextString()
.trim()) / 30);
if (area_page > 16)
area_page = 16;
}
temp_area_house_url = area_house_url.substring(0,
area_house_url.lastIndexOf("/i31/"));
for (int m = 1; m <= area_page; m++) {
server = new URL(temp_area_house_url + "/i3" + m + "/");
connection = (HttpURLConnection) server.openConnection();
try {
parser = new Parser(connection);
} catch (Exception e) {
System.out.println(e.getMessage());
continue;
}
attrfilter = new HasAttributeFilter("class", "housetitle");
n_list = parser.extractAllNodesThatMatch(attrfilter);
for (int i = 0; i < n_list.size(); i++) {
is_person = false;
parser = new Parser();
parser.setInputHTML(n_list.elementAt(i).getParent()
.toHtml());
parser.setEncoding("utf8");
attrfilter = new HasAttributeFilter("class",
"gray9 time");
n_list_1 = parser.extractAllNodesThatMatch(attrfilter);
n_list_2 = n_list_1.elementAt(0).getChildren();
n_list_3 = new NodeList();
for (int n = 0; n < n_list_2.size(); n++) {
if (n_list_2.elementAt(n).getClass().getName()
.equals("org.htmlparser.tags.Span")
&& n_list_2.elementAt(n).toHtml()
.indexOf("个人") >= 0) {
System.out.println("个人房源不抓取");
is_person = true;
break;
}
if (n_list_2.elementAt(n).getClass().getName()
.equals("org.htmlparser.nodes.TextNode"))
n_list_3.add(n_list_2.elementAt(n));
}
if (is_person)
continue;
house_face_img_url = ((org.htmlparser.tags.ImageTag) n_list
.elementAt(i).getParent().getParent()
.getParent().getPreviousSibling()
.getPreviousSibling().getFirstChild()
.getFirstChild()).getAttribute("src2");
System.out.println("房源封面图:" + house_face_img_url);
temp_date = "";
for (int n = 0; n < n_list_3.size(); n++) {
temp_date = temp_date
+ n_list_3.elementAt(n).toPlainTextString();
}
temp_date = replaceString(temp_date);
temp_date = temp_date.substring(0,
temp_date.indexOf("更新"));
System.out.println(temp_date);
temp_date = getStandardTimeForString(temp_date);
System.out.println(temp_date);
node_a = n_list.elementAt(i).getFirstChild();
if (node_a != null) {
house_link = ((LinkTag) node_a).getLink();
System.out.println("[area_house="
+ area_resultSet.getString(2)
+ ",page_house=" + m + "]" + house_link);
query = "select count(id) from uchome_z_soufun_house_link where link='"
+ house_link + "'";
resultSet = statement.executeQuery(query);
if (resultSet.next()) {
if (resultSet.getString(1).trim().equals("0")) {
query = "insert into uchome_z_soufun_house_link(link,house_face_img_url,date,soufun_update_time) values('"
+ house_link
+ "','"
+ house_face_img_url
+ "',now(),'"
+ temp_date + "')";
statement.execute(query);
}
}
}
}
}
} while (area_resultSet.next());
}
}

public static String getStandardTimeForString(String strTime) {
String month = null, day = null, hour = null, minute = null, second = null;
Calendar d = Calendar.getInstance();
if (strTime.endsWith("秒前")) {
d.add(Calendar.SECOND,
-Integer.parseInt(strTime.substring(0,
strTime.indexOf("秒前"))));
} else if (strTime.endsWith("分钟前")) {
d.add(Calendar.MINUTE,
-Integer.parseInt(strTime.substring(0,
strTime.indexOf("分钟前"))));
} else if (strTime.endsWith("小时前")) {
d.add(Calendar.HOUR_OF_DAY,
-Integer.parseInt(strTime.substring(0,
strTime.indexOf("小时前"))));
} else if (strTime.endsWith("天前")) {
d.add(Calendar.DAY_OF_MONTH,
-Integer.parseInt(strTime.substring(0,
strTime.indexOf("天前"))));
} else if (strTime.endsWith("月前")) {
d.add(Calendar.MONTH,
-Integer.parseInt(strTime.substring(0,
strTime.indexOf("月前"))));
} else if (strTime.endsWith("年前")) {
d.add(Calendar.YEAR,
-Integer.parseInt(strTime.substring(0,
strTime.indexOf("年前"))));
}
month = Integer.toString(d.get(Calendar.MONTH) + 1);
if (month.length() == 1)
month = "0" + month;
day = Integer.toString(d.get(Calendar.DAY_OF_MONTH));
if (day.length() == 1)
day = "0" + day;
hour = Integer.toString(d.get(Calendar.HOUR_OF_DAY));
if (hour.length() == 1)
hour = "0" + hour;
minute = Integer.toString(d.get(Calendar.MINUTE));
if (minute.length() == 1)
minute = "0" + minute;
second = Integer.toString(d.get(Calendar.SECOND));
if (second.length() == 1)
second = "0" + second;
strTime = d.get(Calendar.YEAR) + "-" + month + "-" + day + " " + hour
+ ":" + minute + ":" + second;
return strTime;
}

public static String replaceString(String str) {
str = str.replaceAll("\n", "");
str = str.replaceAll("\r", "");
str = str.replaceAll(" ", "");
str = str.replaceAll(" ", "");
str = str.replaceAll(":", "");
str = str.replaceAll(" ", "");
return str;
}

}


package soufun;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Calendar;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.LinkStringFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.StringFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.DefinitionList;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
import org.json.simple.parser.ContainerFactory;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;

public class HouseDetail {
private static String mysql_url = "jdbc:mysql://localhost:3306/abcdefg?characterEncoding=UTF-8";
private static Connection mysql_connection;
private static Statement link_statement;
private static Statement statement;
private static ResultSet link_resultSet;
private static ResultSet resultSet;
private static String query;
private static String soufun_agents_img_path = "d:/soufun/soufun_agents_img/";

public static void main(String[] args) throws ParserException, IOException,
SQLException {
String username = "root";
String password = "";
String str_link = null, house_id = null;
String str_01 = null, str_02 = null;
HasAttributeFilter attrfilter = null;
LinkStringFilter lsf = null;
NodeList n_list = null;
Node node_a = null, node_b = null, node_c = null, node_d = null, node_e = null, node_f = null, node_g = null, node_h = null, node_i = null, node_j = null, node_k = null;
String house_link = null;
String source_agent_img_url = null;
String house_face_img_url = null;
String house_style = null, floor = null, wuye_type = null, area = null, sub_area = null;
String[] fileInfo = null;
try {
Class.forName("com.mysql.jdbc.Driver");
mysql_connection = DriverManager.getConnection(mysql_url, username,
password);
}
// 捕获加载驱动程序异常
catch (ClassNotFoundException cnfex) {
System.err.println("装载 JDBC/ODBC 驱动程序失败。");
cnfex.printStackTrace();
System.exit(1); // terminate program
}
// 捕获连接数据库异常
catch (SQLException sqlex) {
System.err.println("无法连接数据库");
sqlex.printStackTrace();
System.exit(1); // terminate program
}
System.out.println("连接数据库成功");

link_statement = mysql_connection.createStatement();
statement = mysql_connection.createStatement();
query = "select link,id,house_face_img_url from uchome_z_soufun_house_link where type=0";
link_resultSet = link_statement.executeQuery(query);
if (link_resultSet.next()) {
do {
str_link = link_resultSet.getString(1).trim();
house_face_img_url = link_resultSet.getString(3).trim();
query = "update uchome_z_soufun_house_link set type=1,update_time=now()";
URL server = new URL(str_link);
HttpURLConnection connection = (HttpURLConnection) server
.openConnection();
Parser parser = null;
try {
parser = new Parser(connection);
} catch (Exception e) {
System.out.println(e.getMessage());
continue;
}
attrfilter = new HasAttributeFilter("class", "title flc");
n_list = parser.extractAllNodesThatMatch(attrfilter);
if (n_list == null || n_list.size() == 0) {
System.out.println("抓取" + str_link + "失败");
query = "update uchome_z_soufun_house_link set type=2,update_time=now()";
query = query + " where link='" + str_link + "'";
statement.execute(query);
continue;
}
System.out
.println(link_resultSet.getString(2).trim()
+ ":["
+ str_link
+ "]:开始---------------------------------------------------------");
str_01 = n_list.elementAt(0).getFirstChild().getNextSibling()
.toPlainTextString();
System.out.println("房源标题:" + str_01);
query = query + ",房源标题='" + str_01 + "'";

parser.reset();
attrfilter = new HasAttributeFilter("class", "price");
NodeClassFilter myNode = new NodeClassFilter(
DefinitionList.class);
AndFilter andFilter = new AndFilter(myNode, attrfilter);
n_list = parser.extractAllNodesThatMatch(andFilter);
str_02 = n_list.elementAt(0).toPlainTextString();
str_02 = str_02.replaceAll("\n", "");
str_02 = str_02.replaceAll("\r", "");
str_02 = str_02.replaceAll(" ", "");
str_02 = str_02.replaceAll(" ", "");
str_02 = str_02.replaceAll(":", "");
str_02 = str_02.replaceAll(":", "");
str_02 = str_02.replaceAll(" ", "");

str_01 = str_02.substring(str_02.indexOf("总价") + 2,
str_02.indexOf("万元")).trim();
str_02 = str_02.substring(str_02.indexOf("万元") + 2);
System.out.println("总价:" + str_01);
query = query + ",总价='" + str_01 + "'";

str_01 = str_02.substring(str_02.indexOf("单价") + 2,
str_02.indexOf("元/平方米")).trim();
str_02 = str_02.substring(str_02.indexOf("元/平方米") + 5);
System.out.println("单价:" + str_01);
query = query + ",单价='" + str_01 + "'";

str_01 = str_02.substring(str_02.indexOf("建筑面积") + 4,
str_02.indexOf("平方米")).trim();
str_02 = str_02.substring(str_02.indexOf("平方米") + 3);
System.out.println("建筑面积:" + str_01);
query = query + ",建筑面积='" + str_01 + "'";

parser.reset();
attrfilter = new HasAttributeFilter("class", "tel400");
n_list = parser.extractAllNodesThatMatch(attrfilter);
str_01 = n_list.elementAt(0).toPlainTextString();
str_01 = str_01.replaceAll("\n", "");
str_01 = str_01.replaceAll("\r", "");
str_01 = str_01.replaceAll(" ", "");
str_01 = str_01.replaceAll(" ", "");
str_01 = str_01.replaceAll(":", "");
str_01 = str_01.replaceAll(":", "");
str_01 = str_01.replaceAll(" ", "");
str_01 = str_01.trim();
System.out.println("联系电话:" + str_01);
query = query + ",联系电话='" + str_01 + "'";

parser.reset();
attrfilter = new HasAttributeFilter("class", "gray9");
n_list = parser.extractAllNodesThatMatch(attrfilter);
for (int i = 0; i < n_list.size(); i++) {
str_01 = n_list.elementAt(i).toPlainTextString();
str_01 = str_01.replaceAll("\n", "");
str_01 = str_01.replaceAll("\r", "");
str_01 = str_01.replaceAll(" ", "");
str_01 = str_01.replaceAll(" ", "");
str_01 = str_01.replaceAll(":", "");
str_01 = str_01.replaceAll(" ", "");
node_a = n_list.elementAt(i).getNextSibling();
if (node_a != null)
str_02 = node_a.toPlainTextString();
if (str_01.trim().indexOf("房源编号") >= 0) {
str_01 = str_01.substring(4, str_01.indexOf("房经协"));
house_id = str_01.trim();
System.out.println("房源编号:" + str_01);
query = query + ",房源编号='" + str_01 + "'";
} else if (str_01.trim().equals("楼盘名称")) {
node_a = n_list.elementAt(i).getNextSibling();
str_02 = node_a.toPlainTextString();
if (str_02.trim().equals("")) {
node_a = n_list.elementAt(i).getNextSibling()
.getNextSibling();
str_02 = node_a.toPlainTextString();
}
System.out.println("楼盘名称:" + str_02);
query = query + ",楼盘名称='" + str_02 + "'";
str_02 = node_a.getNextSibling().getNextSibling()
.getNextSibling().toPlainTextString();
area = str_02;
System.out.println("城区:" + str_02);
query = query + ",城区='" + str_02 + "'";
str_02 = node_a.getNextSibling().getNextSibling()
.getNextSibling().getNextSibling()
.getNextSibling().toPlainTextString();
sub_area = str_02;
System.out.println("片区:" + str_02);
query = query + ",片区='" + str_02 + "'";
} else if (str_01.trim().equals("地址")) {
System.out.println("地址:" + str_02);
query = query + ",地址='" + str_02 + "'";
} else if (str_01.trim().equals("户型")) {
house_style = str_02;
System.out.println("户型:" + str_02);
query = query + ",户型='" + str_02 + "'";
} else if (str_01.trim().equals("建筑年代")) {
System.out.println("建筑年代:" + str_02);
query = query + ",建筑年代='" + str_02 + "'";
} else if (str_01.trim().equals("朝向")) {
System.out.println("朝向:" + str_02);
query = query + ",朝向='" + str_02 + "'";
} else if (str_01.trim().equals("住宅类别")) {
System.out.println("住宅类别:" + str_02);
query = query + ",住宅类别='" + str_02 + "'";
} else if (str_01.trim().equals("楼层")) {
floor = str_02;
System.out.println("楼层:" + str_02);
query = query + ",楼层='" + str_02 + "'";
} else if (str_01.trim().equals("装修")) {
System.out.println("装修:" + str_02);
query = query + ",装修='" + str_02 + "'";
} else if (str_01.trim().equals("房屋结构")) {
System.out.println("房屋结构:" + str_02);
query = query + ",房屋结构='" + str_02 + "'";
} else if (str_01.trim().equals("产权性质")) {
System.out.println("产权性质:" + str_02);
query = query + ",产权性质='" + str_02 + "'";
} else if (str_01.trim().equals("配套设施")) {
System.out.println("配套设施:" + str_02);
query = query + ",配套设施='" + str_02 + "'";
} else if (str_01.trim().equals("物业类型")) {
wuye_type = str_02;
System.out.println("物业类型:" + str_02);
query = query + ",物业类型='" + str_02 + "'";
} else if (str_01.trim().equals("绿化率")) {
System.out.println("绿化率:" + str_02);
query = query + ",绿化率='" + str_02 + "'";
} else if (str_01.trim().equals("物业费")) {
System.out.println("物业费:" + str_02);
query = query + ",物业费='" + str_02 + "'";
} else if (str_01.trim().equals("物业公司")) {
System.out.println("物业公司:" + str_02);
query = query + ",物业公司='" + str_02 + "'";
} else if (str_01.trim().equals("开发商")) {
System.out.println("开发商:" + str_02);
query = query + ",开发商='" + str_02 + "'";
} else if (str_01.trim().equals("交通状况")) {
System.out.println("交通状况:" + str_02);
query = query + ",交通状况='" + str_02 + "'";
} else if (str_01.trim().equals("周边配套")) {
System.out.println("周边配套:" + str_02);
query = query + ",周边配套='" + str_02 + "'";
}
}

parser.reset();
attrfilter = new HasAttributeFilter("class", "beizhu");
n_list = parser.extractAllNodesThatMatch(attrfilter);
str_01 = n_list.elementAt(0).toPlainTextString();
str_01 = str_01.replaceAll(" ", "");
str_01 = str_01.replaceAll("\n", "");
str_01 = str_01.replaceAll("\r", "");
System.out.println("房源详细信息:" + str_01);
query = query + ",房源详细信息='" + str_01 + "'";

parser.reset();
lsf = new LinkStringFilter("esf.sz.soufun.com/a/");
n_list = parser.extractAllNodesThatMatch(lsf);
if (n_list != null && n_list.size() >= 2) {
source_agent_img_url = ((ImageTag) n_list.elementAt(0)
.getFirstChild()).getImageURL();
System.out.println("source_agent_img_url:"
+ source_agent_img_url);
query = query + ",source_agent_img_url='"
+ source_agent_img_url + "'";
str_01 = n_list.elementAt(1).toPlainTextString();
System.out.println("姓名:" + str_01);
query = query + ",姓名='" + str_01 + "'";
} else {
System.out.println("非经纪人房源,不抓取此网页:" + str_link);
query = "update uchome_z_soufun_house_link set type=3,update_time=now()";
query = query + " where link='" + str_link + "'";
statement.execute(query);
continue;
}

parser.reset();
lsf = new LinkStringFilter(
"esf.sz.soufun.com/newsecond/agtagent/");
n_list = parser.extractAllNodesThatMatch(lsf);
if (n_list != null && n_list.size() >= 1) {
str_01 = n_list.elementAt(0).toPlainTextString();
System.out.println("所属公司:" + str_01);
query = query + ",所属公司='" + str_01 + "'";
} else {
parser.reset();
attrfilter = new HasAttributeFilter("class", "clear mt20");
n_list = parser.extractAllNodesThatMatch(attrfilter);
if (n_list != null && n_list.size() >= 1) {
node_a = n_list.elementAt(0).getNextSibling();
if (node_a != null) {
node_b = node_a.getNextSibling();
if (node_b != null) {
str_01 = node_b.toPlainTextString();
if (str_01.indexOf("所属公司") >= 0) {
str_01 = str_01.substring(str_01
.indexOf(":") + 1);
System.out.println("所属公司:" + str_01);
query = query + ",所属公司='" + str_01 + "'";
} else {
node_c = node_b.getNextSibling();
if (node_c != null)
node_d = node_c.getNextSibling();
if (node_d != null)
node_e = node_d.getNextSibling();
if (node_e != null)
node_f = node_e.getNextSibling();
if (node_f != null) {
str_01 = node_f.toPlainTextString();
System.out.println("所属公司:" + str_01);
query = query + ",所属公司='" + str_01
+ "'";
}
}
}
}
}
}

parser.reset();
attrfilter = new HasAttributeFilter("src",
"http://img1.soufun.com/secondhouse/image/new2housedetail/icon_mail.gif");
n_list = parser.extractAllNodesThatMatch(attrfilter);
if (n_list != null && n_list.size() >= 1) {
node_a = n_list.elementAt(0).getParent();
if (node_a != null) {
node_b = node_a.getPreviousSibling();
if (node_b != null) {
str_01 = node_b.toPlainTextString();
if (str_01.indexOf("门店店长") >= 0) {
str_01 = str_01
.substring(str_01.indexOf(":") + 1);
System.out.println("姓名:" + str_01);
query = query + ",姓名='" + str_01 + "'";
parser.reset();
lsf = new LinkStringFilter(
"esf.sz.soufun.com/a/");
n_list = parser.extractAllNodesThatMatch(lsf);
if (n_list != null && n_list.size() >= 2) {
str_01 = n_list.elementAt(1)
.toPlainTextString();
System.out.println("所属公司:" + str_01);
query = query + ",所属公司='" + str_01 + "'";
}
}
}
}
}

query = query + " where link='" + str_link + "'";
query = query.replace("\\", "\\\\").replaceAll("\r", "")
.replaceAll("\n", "");
try {
statement.execute(query);
} catch (java.sql.SQLException e) {
System.out.println(query);
System.out.println("数据保存出错:" + e.getMessage());
System.exit(0);
}

// fileInfo=getNewFileNameForAgent(source_agent_img_url);
// savePictureForAgent(source_agent_img_url,fileInfo[0],fileInfo[1],house_id);

parser.reset();
attrfilter = new HasAttributeFilter("class", "dashedb");
n_list = parser.extractAllNodesThatMatch(attrfilter);
for (int i = 0; i < n_list.size(); i++) {
if (n_list.elementAt(i).toPlainTextString().trim()
.equals("户型图")) {
node_a = n_list.elementAt(i).getNextSibling();
while (node_a != null
&& node_a
.getClass()
.getName()
.equals("org.htmlparser.tags.DefinitionListBullet")
&& ((org.htmlparser.tags.DefinitionListBullet) node_a)
.getEndTag().toHtml().equals("</dd>")) {
node_b = node_a.getFirstChild().getNextSibling()
.getNextSibling();
str_01 = ((LinkTag) node_b).getLink();
str_02 = str_01;
System.out.println(str_01);
str_01 = ((LinkTag) node_b).getLinkText();
System.out.println(str_01);
savePicture(str_02, house_id, "户型图", str_01);
node_a = node_a.getNextSibling();
}
} else if (n_list.elementAt(i).toPlainTextString().trim()
.equals("室内图")) {
node_a = n_list.elementAt(i).getNextSibling();
while (node_a != null
&& node_a
.getClass()
.getName()
.equals("org.htmlparser.tags.DefinitionListBullet")
&& ((org.htmlparser.tags.DefinitionListBullet) node_a)
.getEndTag().toHtml().equals("</dd>")) {
node_b = node_a.getFirstChild().getNextSibling()
.getNextSibling();
str_01 = ((LinkTag) node_b).getLink();
str_02 = str_01;
System.out.println(str_01);
str_01 = ((LinkTag) node_b).getLinkText();
System.out.println(str_01);
savePicture(str_02, house_id, "室内图", str_01);
node_a = node_a.getNextSibling();
}
}
}

parser.reset();
TagNameFilter tagfilter = new TagNameFilter("script");
n_list = parser.extractAllNodesThatMatch(tagfilter);
for (int i = 0; i < n_list.size(); i++) {
if (n_list.elementAt(i).toHtml().indexOf("photoJson") >= 0) {
str_01 = n_list.elementAt(i).toHtml();
str_01 = str_01.substring(str_01.indexOf("["),
str_01.indexOf("]") + 1);
str_01 = str_01.replaceAll("showtit", "'showtit'");
str_01 = str_01.replaceAll("showtxt", "'showtxt'");
str_01 = str_01.replaceAll("smallpic", "'smallpic'");
str_01 = str_01.replaceAll("'", "\"");
Object obj = JSONValue.parse(str_01);
JSONArray array = (JSONArray) obj;
for (int m = 0; m < array.size(); m++) {
JSONParser parser1 = new JSONParser();
ContainerFactory containerFactory = new ContainerFactory() {
public List creatArrayContainer() {
return new LinkedList();
}

public Map createObjectContainer() {
return new LinkedHashMap();
}
};
try {
Map json = (Map) parser1.parse(array.get(m)
.toString(), containerFactory);
Iterator iter = json.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry entry = (Map.Entry) iter.next();
if (entry.getKey().equals("showtit")) {
str_01 = entry.getValue().toString();
System.out.println("图片说明:" + str_01);
} else if (entry.getKey().equals("bigpic")) {
str_02 = entry.getValue().toString();
System.out.println("图片URL:" + str_02);
}
}
} catch (ParseException pe) {
System.out.println(pe);
}
if (str_01 != null && str_02 != null)
savePicture(str_02, house_id, "外景图", str_01);
}
}
}
if (house_face_img_url != null)
savePicture(house_face_img_url, house_id, "封面图", "房源相册封面图");
dataConversion(str_link, house_style, floor, wuye_type, area,
sub_area);
System.out
.println(link_resultSet.getString(2).trim()
+ ":["
+ str_link
+ "]:结束---------------------------------------------------------");
} while (link_resultSet.next());
}
}

public static void savePicture(String img_url, String house_id,
String img_type, String img_title) throws SQLException {
// Calendar d=Calendar.getInstance();
// String month=null,day=null,hour=null,minute=null,second=null;
// month=Integer.toString(d.get(Calendar.MONTH)+1);
// if(month.length()==1)
// month="0"+month;
// day=Integer.toString(d.get(Calendar.DAY_OF_MONTH));
// if(day.length()==1)
// day="0"+day;
// hour=Integer.toString(d.get(Calendar.HOUR_OF_DAY));
// if(hour.length()==1)
// hour="0"+hour;
// minute=Integer.toString(d.get(Calendar.MINUTE));
// if(minute.length()==1)
// minute="0"+minute;
// second=Integer.toString(d.get(Calendar.SECOND));
// if(second.length()==1)
// second="0"+second;
// StringBuffer mysb=new StringBuffer();
// mysb.append(d.get(Calendar.YEAR));
// mysb.append(month);
// mysb.append(day);
// mysb.append(hour);
// mysb.append(minute);
// mysb.append(second);
// mysb.append(d.get(Calendar.MILLISECOND));
// String fileName=mysb.toString();
// fileName=fileName+img_url.substring(img_url.lastIndexOf("."),img_url.length());
query = "select count(id) from uchome_z_soufun_house_img where house_id='"
+ house_id + "' and source_img_url='" + img_url + "'";
resultSet = statement.executeQuery(query);
if (resultSet.next()) {
if (resultSet.getString(1).trim().equals("0")) {
// String
// target_img_path="d:\\soufun\\soufun_houses_img\\"+house_id+"\\"+fileName;
// target_img_path=target_img_path.replace("\\", "\\\\");
query = "insert into uchome_z_soufun_house_img(house_id,source_img_url,target_img_path,img_type,img_title,date) values('"
+ house_id
+ "','"
+ img_url
+ "','','"
+ img_type
+ "','" + img_title + "',now())";
statement.execute(query);

// URL url = null;
// try {
// url = new URL(img_url);
// } catch (MalformedURLException e2) {
// e2.printStackTrace();
// }
// InputStream is = null;
// try {
// is = url.openStream();
// } catch (IOException e1) {
// System.out.println("获取图片"+img_url+"失败");
// return;
// }
// OutputStream os = null;
// File f = new
// File("d:\\soufun\\soufun_houses_img\\"+house_id+"\\");
// f.mkdirs();
// try{
// os = new FileOutputStream(target_img_path);
// int bytesRead = 0;
// byte[] buffer = new byte[8192];
// while((bytesRead = is.read(buffer,0,8192))!=-1){
// os.write(buffer,0,bytesRead);
// }
// }catch(FileNotFoundException e){
//
// }catch (IOException e) {
// e.printStackTrace();
// }
}
}
}

public static void savePictureForAgent(String img_url, String fileFullName,
String filePath, String house_id) throws SQLException {
query = "select target_agent_img_path from uchome_z_soufun_house_link where `房源编号`='"
+ house_id + "'";
resultSet = statement.executeQuery(query);
if (resultSet.next()) {
if (resultSet.getString(1).trim().equals("")) {
query = "update uchome_z_soufun_house_link set target_agent_img_path='"
+ fileFullName + "' where `房源编号`='" + house_id + "'";
statement.execute(query);

// URL url = null;
// try {
// url = new URL(img_url);
// } catch (MalformedURLException e2) {
// e2.printStackTrace();
// }
// InputStream is = null;
// try {
// is = url.openStream();
// } catch (IOException e1) {
// System.out.println("获取图片"+img_url+"失败");
// return;
// }
// OutputStream os = null;
// File f = new File(filePath);
// f.mkdirs();
// try{
// os = new FileOutputStream(fileFullName);
// int bytesRead = 0;
// byte[] buffer = new byte[8192];
// while((bytesRead = is.read(buffer,0,8192))!=-1){
// os.write(buffer,0,bytesRead);
// }
// }catch(FileNotFoundException e){
// }catch(IOException e){
// e.printStackTrace();
// }
System.out.println("target_agent_img_path:" + fileFullName);
}
}
}

public static String[] getNewFileNameForAgent(String img_url) {
Calendar d = Calendar.getInstance();
String month = null, day = null, hour = null, minute = null, second = null;
month = Integer.toString(d.get(Calendar.MONTH) + 1);
if (month.length() == 1)
month = "0" + month;
day = Integer.toString(d.get(Calendar.DAY_OF_MONTH));
if (day.length() == 1)
day = "0" + day;
hour = Integer.toString(d.get(Calendar.HOUR_OF_DAY));
if (hour.length() == 1)
hour = "0" + hour;
minute = Integer.toString(d.get(Calendar.MINUTE));
if (minute.length() == 1)
minute = "0" + minute;
second = Integer.toString(d.get(Calendar.SECOND));
if (second.length() == 1)
second = "0" + second;
StringBuffer mysb = new StringBuffer();
mysb.append(d.get(Calendar.YEAR));
mysb.append(month);
String temp = mysb.toString();
mysb.append(day);
mysb.append(hour);
mysb.append(minute);
mysb.append(second);
mysb.append(d.get(Calendar.MILLISECOND));
String filePath = soufun_agents_img_path + temp + "/" + day + "/";
String fileFullName = filePath + mysb.toString()
+ img_url.substring(img_url.lastIndexOf("."), img_url.length());
return new String[] { fileFullName, filePath };
}

public static void dataConversion(String str_link, String house_style,
String floor, String wuye_type, String area, String sub_area)
throws SQLException {
String room = null, hall = null, washing = null, cooking = null, floor_current = null, floor_all = null;
boolean execute = false;
System.out.println("数据转换:开始");
query = "update uchome_z_soufun_house_link set update_time=now()";
if (house_style != null && house_style.trim().equals("") == false) {
execute = true;
System.out.println(house_style);
if (house_style.indexOf("室厅") >= 0)
house_style = house_style.replaceAll("室厅", "室一厅");
if (house_style.indexOf("厅卫") >= 0)
house_style = house_style.replaceAll("厅卫", "厅一卫");
if (house_style.indexOf("卫厨") >= 0)
house_style = house_style.replaceAll("卫厨", "卫一厨");
if (house_style.indexOf("室") >= 0) {
if (house_style.indexOf("室") == 0)
house_style = "一" + house_style;
room = house_style.substring(house_style.indexOf("室") - 1,
house_style.indexOf("室"));
room = getEnglishNumberFromChineseNumber(room);
query += ",room='" + room + "'";
System.out.println("room:" + room);
}
if (house_style.indexOf("厅") >= 0) {
hall = house_style.substring(house_style.indexOf("厅") - 1,
house_style.indexOf("厅"));
hall = getEnglishNumberFromChineseNumber(hall);
query += ",hall='" + hall + "'";
System.out.println("hall:" + hall);
}
if (house_style.indexOf("卫") >= 0) {
washing = house_style.substring(house_style.indexOf("卫") - 1,
house_style.indexOf("卫"));
washing = getEnglishNumberFromChineseNumber(washing);
query += ",washing='" + washing + "'";
System.out.println("washing:" + washing);
}
if (house_style.indexOf("厨") >= 0) {
cooking = house_style.substring(house_style.indexOf("厨") - 1,
house_style.indexOf("厨"));
cooking = getEnglishNumberFromChineseNumber(cooking);
query += ",cooking='" + cooking + "'";
System.out.println("cooking:" + cooking);
}
}
if (floor != null && floor.trim().equals("") == false) {
execute = true;
System.out.println(floor);
floor_current = floor.substring(floor.indexOf("第") + 1,
floor.indexOf("层("));
query += ",floor='" + floor_current + "'";
System.out.println("floor_current:" + floor_current);
floor_all = floor.substring(floor.indexOf("共") + 1,
floor.indexOf("层)"));
query += ",floor_all='" + floor_all + "'";
System.out.println("floor_all:" + floor_all);
}
if (wuye_type != null && wuye_type.trim().equals("") == false) {
execute = true;
if (wuye_type.trim().equals("住宅"))
wuye_type = "8";
else if (wuye_type.trim().equals("别墅"))
wuye_type = "14";
else
wuye_type = "0";
query += ",`物业类型ID`='" + wuye_type + "'";
System.out.println("wuye_type:" + wuye_type);
}
if (area != null && area.trim().equals("") == false) {
execute = true;
resultSet = statement
.executeQuery("select zoneid from uchome_z_zone where zonename='"
+ area + "区'");
if (resultSet.next())
area = resultSet.getString(1).trim();
resultSet = statement
.executeQuery("select zoneid from uchome_z_zone where zonename='"
+ sub_area + "'");
if (resultSet.next())
sub_area = resultSet.getString(1).trim();
else {
resultSet = statement
.executeQuery("select zoneid from uchome_z_zone where zonename='其它' and parent_zoneid='"
+ area + "'");
if (resultSet.next())
sub_area = resultSet.getString(1).trim();
}
query += ",`城区ID`='" + area + "'";
query += ",`片区ID`='" + sub_area + "'";
System.out.println("wuye_type:" + wuye_type);
}
if (execute) {
query += " where `link`='" + str_link + "'";
statement.execute(query);
}
System.out.println("数据转换:结束");
}

public static String getEnglishNumberFromChineseNumber(String chinese_number) {
chinese_number = chinese_number.trim();
String english_number = null;
if (chinese_number.equals("零"))
english_number = "0";
else if (chinese_number.equals("一"))
english_number = "1";
else if (chinese_number.equals("二") || chinese_number.equals("两"))
english_number = "2";
else if (chinese_number.equals("三"))
english_number = "3";
else if (chinese_number.equals("四"))
english_number = "4";
else if (chinese_number.equals("五"))
english_number = "5";
else if (chinese_number.equals("六"))
english_number = "6";
else if (chinese_number.equals("七"))
english_number = "7";
else if (chinese_number.equals("八"))
english_number = "8";
else if (chinese_number.equals("九"))
english_number = "9";
else if (chinese_number.equals("十"))
english_number = "10";
return english_number;
}
}


package soufun;

import java.awt.Color;
import java.awt.Font;
import java.awt.Graphics;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;

import javax.imageio.ImageIO;

import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGImageEncoder;

public final class ImageUtils {
public ImageUtils() {

}

/**
* 把图片印刷到图片上
*
* @param pressImg --
* 水印文件
* @param targetImg --
* 目标文件
* @param x
* @param y
*/
public final static void pressImage(String pressImg, String targetImg,
int x, int y) {
try {
File _file = new File(targetImg);
Image src = ImageIO.read(_file);
int wideth = src.getWidth(null);
int height = src.getHeight(null);
BufferedImage image = new BufferedImage(wideth, height,
BufferedImage.TYPE_INT_RGB);
Graphics g = image.createGraphics();
g.drawImage(src, 0, 0, wideth, height, null);

// 水印文件
File _filebiao = new File(pressImg);
Image src_biao = ImageIO.read(_filebiao);
int wideth_biao = src_biao.getWidth(null);
int height_biao = src_biao.getHeight(null);
g.drawImage(src_biao, wideth - wideth_biao - x, height
- height_biao - y, wideth_biao, height_biao, null);
// /
g.dispose();
FileOutputStream out = new FileOutputStream(targetImg);
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);
encoder.encode(image);
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}

/**
* 打印文字水印图片
*
* @param pressText
* --文字
* @param targetImg --
* 目标图片
* @param fontName --
* 字体名
* @param fontStyle --
* 字体样式
* @param color --
* 字体颜色
* @param fontSize --
* 字体大小
* @param x --
* 偏移量
* @param y
*/

public static void pressText(String pressText, String targetImg,
String fontName, int fontStyle, int color, int fontSize, int x,
int y) {
try {
File _file = new File(targetImg);
Image src = ImageIO.read(_file);
int wideth = src.getWidth(null);
int height = src.getHeight(null);
BufferedImage image = new BufferedImage(wideth, height,
BufferedImage.TYPE_INT_RGB);
Graphics g = image.createGraphics();
g.drawImage(src, 0, 0, wideth, height, null);
// String s="www.qhd.com.cn";
g.setColor(Color.RED);
g.setFont(new Font(fontName, fontStyle, fontSize));

g.drawString(pressText, wideth - fontSize - x, height - fontSize
/ 2 - y);
g.dispose();
FileOutputStream out = new FileOutputStream(targetImg);
JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);
encoder.encode(image);
out.close();
} catch (Exception e) {
System.out.println(e);
}
}

public static void main(String[] args) {
pressImage("d:/soufun/4000_watermark.png", "d://2011043000251446.jpeg", 5, 5);
}
}


package soufun;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

public class MapABC {
public static void main(String[] args) throws ParserException, IOException {
String[] a = getMapABCXY("2810956964", "深圳");
System.out.println(a[0]);
System.out.println(a[1]);
}

public static String[] getMapABCXY(String new_house_code, String city_name)
throws ParserException, IOException {
String mapabc_x = null, mapabc_y = null, str_temp = null;
URL server = null;
HttpURLConnection connection = null;
Parser parser = null;
server = new URL(
"http://smap.soufun.com/house/iframe/newhouse_map_iframe.php?newcode="
+ new_house_code + "&city=" + city_name);
connection = (HttpURLConnection) server.openConnection();
parser = new Parser(connection);
TagNameFilter tagfilter = new TagNameFilter("script");
NodeList n_list = parser.extractAllNodesThatMatch(tagfilter);
for (int i = 0; i < n_list.size(); i++) {
str_temp = n_list.elementAt(i).toHtml();
str_temp = replaceString(str_temp);
str_temp = str_temp.replaceAll(";", "");
if (str_temp.indexOf("cityx") >= 0) {
str_temp = str_temp.substring(str_temp.indexOf("varcityx") + 9,
str_temp.indexOf("varmapsize"));
mapabc_x = str_temp.substring(0, str_temp.indexOf("varcityy"));
mapabc_y = str_temp.substring(str_temp.indexOf("varcityy") + 9);
}
}
return new String[] { mapabc_x, mapabc_y };
}

public static String replaceString(String str) {
str = str.replaceAll("\n", "");
str = str.replaceAll("\r", "");
str = str.replaceAll(" ", "");
str = str.replaceAll(" ", "");
str = str.replaceAll(":", "");
str = str.replaceAll(" ", "");
return str;
}

}


package soufun;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Queue;

/**
* @author tiwson 2010-06-02
*
*/
public class FileSearcher {

/**
* 递归查找文件
*
* @param baseDirName
* 查找的文件夹路径
* @param targetFileName
* 需要查找的文件名
* @param fileList
* 查找到的文件集合
*/
public static void findFiles(String baseDirName, String targetFileName,
List fileList) {
/**
* 算法简述: 从某个给定的需查找的文件夹出发,搜索该文件夹的所有子文件夹及文件,
* 若为文件,则进行匹配,匹配成功则加入结果集,若为子文件夹,则进队列。 队列不空,重复上述操作,队列为空,程序结束,返回结果。
*/
String tempName = null;
// 判断目录是否存在
File baseDir = new File(baseDirName);
if (!baseDir.exists() || !baseDir.isDirectory()) {
System.out.println("文件查找失败:" + baseDirName + "不是一个目录!");
} else {
String[] filelist = baseDir.list();
if (filelist != null)
for (int i = 0; i < filelist.length; i++) {
File readfile = new File(baseDirName + "\\" + filelist[i]);
// System.out.println(readfile.getName());
if (!readfile.isDirectory()) {
tempName = readfile.getName();
if (FileSearcher.wildcardMatch("*.jpg", tempName)
|| FileSearcher.wildcardMatch("*.jpeg",
tempName)
|| FileSearcher
.wildcardMatch("*.gif", tempName)
|| FileSearcher
.wildcardMatch("*.png", tempName)) {
// 匹配成功,将文件名添加到结果集
fileList.add(readfile.getAbsoluteFile());
}
} else if (readfile.isDirectory()) {
findFiles(baseDirName + "\\" + filelist[i],
targetFileName, fileList);
}
}
}
}

/**
* 通配符匹配
*
* @param pattern
* 通配符模式
* @param str
* 待匹配的字符串
* @return 匹配成功则返回true,否则返回false
*/
private static boolean wildcardMatch(String pattern, String str) {
int patternLength = pattern.length();
int strLength = str.length();
int strIndex = 0;
char ch;
str = str.toLowerCase();
for (int patternIndex = 0; patternIndex < patternLength; patternIndex++) {
ch = pattern.charAt(patternIndex);
if (ch == '*') {
// 通配符星号*表示可以匹配任意多个字符
while (strIndex < strLength) {
if (wildcardMatch(pattern.substring(patternIndex + 1),
str.substring(strIndex))) {
return true;
}
strIndex++;
}
} else if (ch == '?') {
// 通配符问号?表示匹配任意一个字符
strIndex++;
if (strIndex > strLength) {
// 表示str中已经没有字符匹配?了。
return false;
}
} else {
if ((strIndex >= strLength) || (ch != str.charAt(strIndex))) {
return false;
}
strIndex++;
}
}
return (strIndex == strLength);
}

public static void main(String[] paramert) {
// 在此目录中找文件
String baseDIR = "d:/soufun/soufun_houses_img/";
// 找扩展名为txt的文件
String fileName = "*.jpg";
List resultList = new ArrayList();
FileSearcher.findFiles(baseDIR, fileName, resultList);
if (resultList.size() == 0) {
System.out.println("No File Fount.");
} else {
for (int i = 0; i < resultList.size(); i++) {
System.out.println(resultList.get(i));// 显示查找结果。
ImageUtils.pressImage("d:/soufun/4000_watermark.png",
resultList.get(i).toString(), 5, 5);
}
}
}
}


package soufun;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

public class ThreadSleep {
public static void main(String[] args) throws ParserException, IOException {

}

public static void setThreadSleep() {
try {
Thread.sleep(0);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值