又一采集器,呵呵
JDBC.java
package com.baoruan;
import java.sql.*;
public class JDBC {
Connection con;
public Connection getConnection() {
try {
Class.forName("com.mysql.jdbc.Driver");
System.out.println("数据库驱动加载成功!");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
try {
con = DriverManager.getConnection(
"jdbc:mysql://localhost/baoruan?useUnicode=true&characterEncoding=UTF-8", "root", "123456");
System.out.println("数据库连接成功!");
} catch (SQLException e) {
e.printStackTrace();
}
return con;
}
public static void main(String[] args) {
new JDBC().getConnection();
}
}
JDBCConnect.java
package com.baoruan;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
public class JDBCConnect {
static Connection con;
static PreparedStatement sql;
public JDBCConnect() {
JDBC conn = new JDBC();
con = conn.getConnection();
}
public ResultSet read(String sqlworld) {
ResultSet res = null;
try {
sql = con.prepareStatement(sqlworld);
res = sql.executeQuery();
// System.out.println("执行增加,修改,删除前后数据!");
// while(res.next()){
// HashMap <String,String>brandlist=new HashMap<String,String>();
// id=res.getString("id");
// String brand=res.getString("brand");
// String downloadlist=res.getString("downloadlist");
/*
* String gameName=res.getString("gameName"); String
* gameType=res.getString("gameType");
*
* System.out.println("id 为:"+id);
* System.out.println("gameid 为:"+gameid);
* System.out.println("downloadlist 为:"+downloadlist);
* System.out.println("gameName 为:"+gameName);
* System.out.println("gameType 为:"+gameType);
*/
// }
} catch (Exception e) {
e.printStackTrace();
System.out.println("数据库读取异常");
}
return res;
}
public void update(String sqlworld) {
try {
sql = con.prepareStatement(sqlworld);
sql.executeUpdate();
sql.close();
System.out.println("更新成功!");
} catch (Exception e) {
System.out.println("更新异常!");
e.printStackTrace();
}
}
/*
* public static void main(String[] args)throws Exception{ hellosql re=new
* hellosql(); // String
* sql="select COUNT(brand) AS GameNum from jixing where id =50"; //
* ResultSet res=re.read(sql); // res.last(); // int count=res.getRow(); //
* if(count==0){ // System.out.println("没有该值 "); // }else{ //
* System.out.println("有该值 "); // } // 获取行数 // // while(res.next()){ //
* String id=res.getString("id"); // String brand=res.getString("brand"); //
* String mobileType=res.getString("mobileType"); // String
* url=res.getString("url"); //
* System.out.println(id+" "+brand+" "
* +mobileType+" "+url); // } //
* System.out.println(res.getInt("GameNum")); // if(k==0){ //
* System.out.println(k); // System.out.println("该值已存在!"); // }else{ //
* System.out.println(k); // System.out.println("该值可以保存!"); // } String
* sql2="UPDATE sr_soft SET adaptedMobileTypeIds = '88' WHERE softId =61";
* re.upadte(sql2); String sql1 = "select * from sr_soft where softId=61";
* ResultSet gameIsExist = re.read(sql1); String
* adaptedMobileTypeIds_constant=""; while(gameIsExist.next()){
* adaptedMobileTypeIds_constant
* =gameIsExist.getString("adaptedMobileTypeIds");
* System.out.println("读一下机型表"
* +adaptedMobileTypeIds_constant);//取adaptedMobileTypeIds字段的值 }
*
* }
*/
}
SavePackageThread.java
package com.baoruan;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class SavePackageThread implements Runnable{
private String url;
private String savePath;
public SavePackageThread(){
}
public SavePackageThread(String url, String savePath){
this.url = url;
this.savePath = savePath;
}
public void checkFile() throws Exception{
File file = new File(savePath);
File parentFile = file.getParentFile();
if(!parentFile.isDirectory())parentFile.mkdirs();
if(!file.exists())file.createNewFile();
}
public InputStream getInputStream(){
InputStream is = null;
try{
if(this.url != null && !"".equals(this.url)){
URL url = new URL(this.url);
HttpURLConnection httpConn = (HttpURLConnection)url.openConnection();
httpConn.setRequestProperty("Connection", "Keep-Alive"); //保持一直连接
httpConn.setConnectTimeout(60 * 1000 * 5); //连接超时5分钟
httpConn.setRequestMethod("POST"); //以GET方式连接
httpConn.setAllowUserInteraction(true);
return new BufferedInputStream(httpConn.getInputStream(),1024*8);
}
}catch(Exception ex){
ex.printStackTrace();
}
return is;
}
@Override
public void run() {
try{
checkFile();
// 此方法只能用于HTTP协议
File path = new File(this.savePath).getParentFile();
if (!path.isDirectory()) {
path.mkdirs();
}
try {
URL url = new URL(this.url);
HttpURLConnection connection = (HttpURLConnection) url
.openConnection();
DataInputStream in = new DataInputStream(connection
.getInputStream());
DataOutputStream out = new DataOutputStream(new FileOutputStream(
this.savePath));
byte[] buffer = new byte[4096];
int count = 0;
while ((count = in.read(buffer)) > 0) {
out.write(buffer, 0, count);
}
out.close();
in.close();
} catch (Exception e) {
path.delete();
}
}catch(Exception ex){
ex.printStackTrace();
}
}
}
Client.java
package com.baoruan;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
public class Client {
private List<String> imageUrlList = new ArrayList<String>();
private static final int BUFFER_SIZE = 1024*80;
public List<String> getImageUrlList() {
return imageUrlList;
}
public void setImageUrlList(List<String> imageUrlList) {
this.imageUrlList = imageUrlList;
}
public static void main(String[] args) throws Exception {
JDBCConnect connect = new JDBCConnect();
String baseLink_left = "http://baoruan.com/download/downpage/freegamedown/gid/";//1100279~~1109419
String baseLink_right = "/nopage/1/mid/12418";
//1105572
for(int i=1105572; i<1109420; i++){
Client client = new Client();
String urlAddress = baseLink_left + i + baseLink_right;
System.out.println("urlAddress---------------->" + urlAddress);
String web_Content = client.httpclient(urlAddress);
// String web_Content = client.getStringFromReader(client.getReader(urlAddress));
if(web_Content.length()<1000)continue;
String gameName = ""; //游戏名称
String version = ""; //版本号
String description = ""; //游戏说明
String category = ""; //游戏分类
String imageurl = ""; //图片地址
String apkurl = ""; //游戏包地址
String remoteApkurl = ""; //远程游戏包地址
String model = "";
String gameNameSub = client.indexOfContent(web_Content, "<card id=\"main\" title=\"", "-宝软网");
if(gameNameSub == null || "".equals(gameNameSub))continue;
if(gameNameSub.split("v\\d").length >1){
String[] st = gameNameSub.split("v");
gameName = st[0];
version = st[1];
}else{
gameName = gameNameSub;
}
String updatesql = "select * from baoruan where gameName = '" + gameName + "'";
ResultSet gameIsExist = connect.read(updatesql);
String id="";
while(gameIsExist.next()){
id = gameIsExist.getString("id");
}
gameIsExist.last();
if (gameIsExist.getRow() != 0) { // 如果游戏资料存在,直接跳到下载游戏包
gameIsExist.close();
String apkurlCopy = client.indexOfContent(web_Content, "<img src=\"http://baoruan.com/images/down.gif\" alt='' /> <a href=\"", "\">下载安装");
if(apkurlCopy != null){
System.out.println("apkurlCopy------------->" + apkurlCopy);
apkurlCopy = "http://baoruan.com" + apkurlCopy;
System.out.println("apkurlCopy---------------->" + apkurlCopy);
String gamePackagePage = client.httpclient(apkurlCopy);
System.out.println("gamePackagePage----------------->" + gamePackagePage);
if(gamePackagePage != null){
remoteApkurl = client.indexOfContent(gamePackagePage, "ontimer=\"", "\"");
}
}
apkurl = "C:/upload/" + id + "/" + id + ".apk";
new Thread(new SavePackageThread(remoteApkurl, apkurl)).start();
if(version != "" && "".equals(version)){
String sql = "update baoruan set version = '" + version +"' where id = " + id;
connect.update(sql);
}
continue;
}
description = client.indexOfContent(web_Content, "介绍:", "<img src=\"http://baoruan.com/images/down.gif\" alt='' />");
model = client.indexOfContent(web_Content, "当前适配机型:", "<br />");
String categoryCopy = client.indexOfContent(web_Content, "分类:", "<a href=\"/download/cutpic/show");
String apkurlCopy = client.indexOfContent(web_Content, "<img src=\"http://baoruan.com/images/down.gif\" alt='' /> <a href=\"", "\">下载安装");
if(apkurlCopy != null){
System.out.println("apkurlCopy------------->" + apkurlCopy);
apkurlCopy = "http://baoruan.com" + apkurlCopy;
System.out.println("apkurlCopy---------------->" + apkurlCopy);
String gamePackagePage = client.httpclient(apkurlCopy);
System.out.println("gamePackagePage----------------->" + gamePackagePage);
if(gamePackagePage != null){
remoteApkurl = client.indexOfContent(gamePackagePage, "ontimer=\"", "\"");
}
}
apkurl = "C:/upload/" + i + "/" + i + ".apk";
new Thread(new SavePackageThread(remoteApkurl, apkurl)).start();
category = client.gameTypeindexof(categoryCopy);
//处理截图
imageurl = client.indexOfContent(web_Content, "分类:", "截图</a>");
imageurl = "http://baoruan.com" + client.indexOfContent(imageurl, "</a><a href=\"", "\">");
System.out.println("imageurl---------->" + imageurl);
client.getImageUrlFromURLFirst(imageurl, "C:" + "/" +"upload" + "/" + i + "/");
imageurl = client.getStringFromList(",");
imageurl = imageurl.replaceAll("C:/", "");
apkurl = apkurl.replaceAll("C:/", "");
description = description.replaceAll("'", "‘");
System.out.println("gameName------------->" + gameName);
System.out.println("version------------->" + version);
System.out.println("description------------->" + description);
System.out.println("category------------->" + category);
System.out.println("apkurl------------->" + apkurl);
System.out.println("imageurl------------->" + client.getStringFromList(","));
String sql = "insert into baoruan(gameName, category, imageurl, description, version, apkurl, model)values('" + gameName +"','" + category +"','" + imageurl +"','" + description +"','" + version +"','" + apkurl +"','" + model +"')";
System.out.println("sql_----------------->" +sql);
connect.update(sql);
}
}
//去除html代码
public String gameTypeindexof(String content) {
if(content == "" || "".equals(content))return content;
Pattern patt = Pattern.compile("<[^>]+>([^<]*)</[^>]+>");
Matcher m = patt.matcher(content);
while (m.find()) {
content = content.replaceFirst("<[^>]+>([^<]*)</[^>]+>", m.group(1)
.toString());
}
return content;
}
//去除html代码
//拿到APK游戏包
/*public String getGamePackageUrl(String url, String path){
String wap_content = null;
try {
HttpClient httpclient = new HttpClient();
httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT,"Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) Gecko/20090803 Fedora/3.5.2-2.fc11 Firefox/3.5.2");
GetMethod getmethod = new GetMethod(url);
// 使用系统提供的默认的恢复策略 默认连接失败后重复连接3次
getmethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
getmethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF-8");
int code = httpclient.executeMethod(getmethod);
if (code != HttpStatus.SC_OK) {
throw new Exception("网页读取失败!");
// continue;
}
BufferedInputStream bis = getInputStream(url);
FileOutputStream fos = new FileOutputStream(path);
byte[] by = new byte[4096];
int length = 0;
while(-1 != (length = bis.read(by))){
fos.write(by, 0, length);
}
fos.close();
bis.close();
getmethod.releaseConnection();//关闭getmethod连接
return wap_content;
} catch (Exception e) {
e.printStackTrace();
return wap_content;
}
}
//拿到APK游戏包
*/
//正则分析
public String indexOfContent(String content, String head, String footer) {
int index;
int index1;
String newio = null;
index = content.indexOf(head);
if (index >= 0) {
String inputreader1 = content.substring(index + head.length());
index1 = inputreader1.indexOf(footer);
if (index1 >= 0) {
newio = inputreader1.substring(0, index1);
}
}
return newio;
}
//正则分析
/*---httpclient---*/
public String httpclient(String url) {
String wap_content = null;
try {
HttpClient httpclient = new HttpClient();
httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT,"Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) Gecko/20090803 Fedora/3.5.2-2.fc11 Firefox/3.5.2");
GetMethod getmethod = new GetMethod(url);
// 使用系统提供的默认的恢复策略 默认连接失败后重复连接3次
getmethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler());
getmethod.getParams().setParameter(
HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF-8");
int code = httpclient.executeMethod(getmethod);
if (code != HttpStatus.SC_OK) {
throw new Exception("网页读取失败!");
// continue;
}
BufferedReader buffer = new BufferedReader(new InputStreamReader(
getmethod.getResponseBodyAsStream(), "UTF-8"));
StringBuffer string_buffer = new StringBuffer();
String snap = "";
while ((snap = buffer.readLine()) != null) {
string_buffer.append(snap);
}
wap_content = string_buffer.toString();
getmethod.releaseConnection();//关闭getmethod连接
return wap_content;
} catch (Exception e) {
e.printStackTrace();
return wap_content;
}
}
/*---httpclient---*/
public BufferedInputStream getInputStream(String urlAddress){
BufferedInputStream is = null;
try{
URL url = new URL(urlAddress);
HttpURLConnection httpConn = (HttpURLConnection)url.openConnection();
httpConn.setRequestProperty("Connection", "Keep-Alive"); //保持一直连接
httpConn.setConnectTimeout(60 * 1000 * 5); //连接超时5分钟
httpConn.setRequestMethod("GET"); //以GET方式连接
httpConn.setAllowUserInteraction(true);
is = new BufferedInputStream(httpConn.getInputStream(), Client.BUFFER_SIZE);
}catch(Exception ex){
return null;
}
return is;
}
public String getStringFromReader(Reader reader){
if(reader == null)return null;
StringBuffer sb = new StringBuffer();
try{
BufferedReader br = (BufferedReader)reader;
String str = null;
while(null != (str = br.readLine())){
sb.append(str);
}
}catch(Exception ex){
ex.printStackTrace();
}
return sb.toString();
}
public void getImageUrlFromURLFirst(String urlAddress,String savePath){
if(urlAddress == null || "".equals(urlAddress))return;
String content = httpclient(urlAddress);
if(content == null || "".equals(content))return;
System.out.println("content------------>" + content);
String urlSpan = indexOfContent(content,".jpg\" /><br />","图片经过压缩");
if(urlSpan == null || "".equals(urlSpan))return;
if(urlSpan.split("<a href = \"").length >1){
String[] urlList = urlSpan.split("<a href = \"");
for(String url : urlList){
if(url.length()<10)continue;
System.out.println(url.split("\">")[0]);
getImageUrlFromURL("http://baoruan.com" + url.split("\">")[0], savePath);
}
}
}
public void getImageUrlFromURL(String urlAddress,String savePath){
if(urlAddress == null || "".equals(urlAddress))return;
String content = httpclient(urlAddress);
if(content == null || "".equals(content))return;
System.out.println("content------------>" + content);
String hotaddress = indexOfContent(content,"-=截图欣赏=-<br /><img src=\"","\" />");
if(hotaddress == null || "".equals(hotaddress))return;
if(hotaddress.indexOf(".jpg")>0){
String imageUrl = getImageName(savePath);
new Thread(new SavePackageThread(hotaddress, imageUrl)).start();
this.imageUrlList.add(imageUrl);
}
}
public String getImageName(String savePath){
int i = 1;
while(new File(savePath + i + ".jpg").exists()){
i++;
}
String imageName = savePath + i + ".jpg";
return imageName;
}
public String getStringFromList(String joinSign){
StringBuffer sb = new StringBuffer();
for(String str : this.imageUrlList){
sb.append(str + joinSign);
}
return sb.toString();
}
}