学习笔记仅供参考
1.xiancheng.class
package com.example.util;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.*;
import java.util.ArrayList;
import java.util.Random;
public class xiancheng implements Runnable {
static ArrayList<String> ips = new ArrayList<String>();
public ArrayList<LW> lws = new ArrayList<>();
static xiancheng xian = new xiancheng();
static String name="";
int q = 0;
public xiancheng(){
}
public xiancheng(ArrayList<String> ipss, int pages) {
ips = ipss;
q = pages;
}
public ArrayList<LW>getlws(){
return lws;
}
public String getip() throws InterruptedException {
System.getProperties().setProperty("http.proxuHost","113.140.84.97");
System.getProperties().setProperty("http.proxyPort","80");
String ip = new pachong().ips1().get(0);
System.out.println("IP是:"+ip);
return ip;
}
@Override
public void run() {
Random x = new Random();
String path = "";
// String [] ips = {"120.79.64.147","175.148.74.121","139.9.195.202","60.167.132.223"};
// String [] ports = {"8118","1133","8118","808"};
// System.getProperties().setProperty("http.proxyHost", ips[q%5]);
// System.getProperties().setProperty("http.proxyPort", ports[q%5]);
// System.err.println(ips[q%5]+":"+ports[q%5]);
try {
path = "http://qikan.chaoxing.com/searchjour?sw=" + URLEncoder.encode("西北农林科技大学", "utf-8") + "&stryear=17&nosim=1&size=50&x=0_900&pages=" + String.valueOf(q);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
q++;
Random r = new Random();
String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
"Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0"};
int k = r.nextInt(14);
String ip = null;
Document doc = null;
try {
doc = Jsoup.connect(path)
.timeout(10000)
.ignoreHttpErrors(true)
.userAgent(ua[k])
// .cookie("msign_dsr", "1592578548328")
// .cookie("search_uuid", "59d60d20%2d888c%2d4339%2d893c%2d3f2df0b416a9")
// .cookie("UM_distinctid", "172cd13faab18a-0cc175a1ccab1-3c3f5a0c-1fa400-172cd13faac61e")
// .cookie("__dxca", "7959aeaa-2cb3-44b4-ac6d-5a19f2ac1261")
// .cookie("lv", "0")
// .cookie("chaoxinguser", "1")
// .cookie("uname", "")
// .cookie("_uid", "143680086")
// .cookie("uf", "f9866f9a46b70622b0dee5ad98478a8391831cfec1654dd57a4fb73fc6375849f7893fe5a7d754cdff17dba0ec4416549b0594e13f4b452fbdd6b93a431584911471850d8bf7e34c24848f3d3f307228570bdddc4e9885c5")
// .cookie("_d", "1592928430294")
// .cookie("UID", "143680086")
// .cookie("vc", "A5267B905B437CE795E472D324AD65E0")
// .cookie("vc2", "C473E4E0C10364B7C0A31DE46070C411")
// .cookie("vc3", "C9JC%2Ff8LUHJ%2FbURcxo1vggUNgTmarTCVJFGMM3kP7MoczjjCxUPi89UGJjjLU4sQ6WNHQgZOUN1mj6sDYiMWNCcwVw%2BMLRe1D3y5d%2FofpoF2AMh3LToyTKUsC9ykkTfsWnWiDPL32C%2Bziy%2BnJFuaW5KB9%2FlDulddw%2B9thByI%2Bvg%3Daaeb61274732c450c6a62c27f9001cce")
// .cookie("xxtenc", "3fa0ee7c7d4f9d548e4bc1d7336c838c")
// .cookie("duxiu", "userName%5fdsr%2c%3dxbnlkjdx%2c%21userid%5fdsr%2c%3d1406%2c%21char%5fdsr%2c%3d%u78cb%2c%21metaType%2c%3d353%2c%21dsr%5ffrom%2c%3d1%2c%21logo%5fdsr%2c%3dlogo0408%2ejpg%2c%21logosmall%5fdsr%2c%3dsmall0408%2ejpg%2c%21title%5fdsr%2c%3d%u897f%u5317%u519c%u6797%u79d1%u6280%u5927%u5b66%2c%21url%5fdsr%2c%3debook%2c%21compcode%5fdsr%2c%3d1176%2c%21province%5fdsr%2c%3d%u9655%u897f%2c%21readDom%2c%3d0%2c%21isdomain%2c%3d61707%2c%21showcol%2c%3d0%2c%21hu%2c%3d0%2c%21uscol%2c%3d0%2c%21isfirst%2c%3d0%2c%21istest%2c%3d0%2c%21cdb%2c%3d0%2c%21og%2c%3d1%2c%21ogvalue%2c%3d7%2c%21testornot%2c%3d1%2c%21remind%2c%3d0%2c%21datecount%2c%3d3478%2c%21userIPType%2c%3d2%2c%21lt%2c%3d0%2c%21ttt%2c%3dfxlogin%2echaoxing%2c%21enc%5fdsr%2c%3dD5E78CB6EBD2A70090256E4C463A3566")
.get();
} catch (IOException e) {
return;
}
Elements subjects = doc.select("#liebiaoDivId tr");
Elements authors = doc.select("#liebiaoDivId tr");
Elements downloadnum = doc.select("#liebiaoDivId tr");
Elements quotenum = doc.select("#liebiaoDivId tr");
String[] pathes = new String[50];
String[] quotenums = new String[50];
String[] downloadnums = new String[50];
String[] periodicals = new String[50];
String[] publishtimes = new String[50];
String[] funds = new String[50];
String[][] keys = new String[50][8];
String[] units = new String[50];
String[] abstracts = new String[50];
for (int i = 0; i < 50; i++) {
pathes[i] = "http://qikan.chaoxing.com" + subjects.get(i + 1).child(1).child(0).attr("href").trim();
quotenums[i] = quotenum.get(i + 1).child(6).child(0).text();
downloadnums[i] = downloadnum.get(i + 1).child(7).child(0).text();
periodicals[i] = downloadnum.get(i + 1).child(3).child(0).text();
if (downloadnum.get(i + 1).child(5).child(0).text().length() == 8) {
publishtimes[i] = downloadnum.get(i + 1).child(5).child(0).text().substring(0, 4) + "-0" + String.valueOf(Integer.parseInt(String.valueOf(downloadnum.get(i + 1).child(5).child(0).text().charAt(6))));
} else if (downloadnum.get(i + 1).child(5).child(0).text().length() > 8 && (Integer.valueOf(downloadnum.get(i + 1).child(5).child(0).text().charAt(6)) <= 9) && (Integer.valueOf(downloadnum.get(i + 1).child(5).child(0).text().charAt(6)) >= 0)) {
publishtimes[i] = downloadnum.get(i + 1).child(5).child(0).text().substring(0, 4) + "-" + String.valueOf(Integer.valueOf(downloadnum.get(i + 1).child(5).child(0).text().substring(6, 8)) / 2);
} else {
publishtimes[i] = "1";
}
//System.out.println(publishtimes[i]);
//System.out.println(periodicals[i]);
//System.out.println(downloadnums[i]);
//System.out.println(quotenums[i]);
//System.out.println(pathes[i]);
}
for (int i = 0; i < 50; i++) {
if (i % 10 == 0) {
try {
ip = xian.getip();
} catch (InterruptedException e) {
e.printStackTrace();
}
String[] r1 = ip.split(":");
System.out.println(ip);
System.getProperties().setProperty("http.proxyHost", r1[0]);
System.getProperties().setProperty("http.proxyPort", r1[1]);
System.err.println(r1[0] + ":" + r1[1]);
}
Document doc1 = null;
try {
doc1 = Jsoup.connect(pathes[i])
.timeout(10000)
.ignoreHttpErrors(true)
.userAgent(ua[k])
// .cookie("msign_dsr", "1592578548328")
// .cookie("search_uuid", "59d60d20%2d888c%2d4339%2d893c%2d3f2df0b416a9")
// .cookie("UM_distinctid", "172cd13faab18a-0cc175a1ccab1-3c3f5a0c-1fa400-172cd13faac61e")
// .cookie("__dxca", "7959aeaa-2cb3-44b4-ac6d-5a19f2ac1261")
// .cookie("lv", "0")
// .cookie("chaoxinguser", "1")
// .cookie("uname", "")
// .cookie("_uid", "143680086")
// .cookie("uf", "f9866f9a46b70622b0dee5ad98478a8391831cfec1654dd57a4fb73fc6375849f7893fe5a7d754cdff17dba0ec4416549b0594e13f4b452fbdd6b93a431584911471850d8bf7e34c24848f3d3f307228570bdddc4e9885c5")
// .cookie("_d", "1592928430294")
// .cookie("UID", "143680086")
// .cookie("vc", "A5267B905B437CE795E472D324AD65E0")
// .cookie("vc2", "C473E4E0C10364B7C0A31DE46070C411")
// .cookie("vc3", "C9JC%2Ff8LUHJ%2FbURcxo1vggUNgTmarTCVJFGMM3kP7MoczjjCxUPi89UGJjjLU4sQ6WNHQgZOUN1mj6sDYiMWNCcwVw%2BMLRe1D3y5d%2FofpoF2AMh3LToyTKUsC9ykkTfsWnWiDPL32C%2Bziy%2BnJFuaW5KB9%2FlDulddw%2B9thByI%2Bvg%3Daaeb61274732c450c6a62c27f9001cce")
// .cookie("xxtenc", "3fa0ee7c7d4f9d548e4bc1d7336c838c")
// .cookie("duxiu", "userName%5fdsr%2c%3dxbnlkjdx%2c%21userid%5fdsr%2c%3d1406%2c%21char%5fdsr%2c%3d%u78cb%2c%21metaType%2c%3d353%2c%21dsr%5ffrom%2c%3d1%2c%21logo%5fdsr%2c%3dlogo0408%2ejpg%2c%21logosmall%5fdsr%2c%3dsmall0408%2ejpg%2c%21title%5fdsr%2c%3d%u897f%u5317%u519c%u6797%u79d1%u6280%u5927%u5b66%2c%21url%5fdsr%2c%3debook%2c%21compcode%5fdsr%2c%3d1176%2c%21province%5fdsr%2c%3d%u9655%u897f%2c%21readDom%2c%3d0%2c%21isdomain%2c%3d61707%2c%21showcol%2c%3d0%2c%21hu%2c%3d0%2c%21uscol%2c%3d0%2c%21isfirst%2c%3d0%2c%21istest%2c%3d0%2c%21cdb%2c%3d0%2c%21og%2c%3d1%2c%21ogvalue%2c%3d7%2c%21testornot%2c%3d1%2c%21remind%2c%3d0%2c%21datecount%2c%3d3478%2c%21userIPType%2c%3d2%2c%21lt%2c%3d0%2c%21ttt%2c%3dfxlogin%2echaoxing%2c%21enc%5fdsr%2c%3dD5E78CB6EBD2A70090256E4C463A3566")
.get();
} catch (IOException e) {
return;
}
Elements key = doc1.select(".Fmian1 table tbody tr");
System.out.println(i + 1 + ": " + pathes[i]);
//【作者机构】
//【来 源】
//【分 类 号】
//【分类导航】
//【关 键 词】
//【基 金】
//【摘 要】
//【统计数据】
//【全文获取】
for (Element e : key) {
if (e.child(0).text().equals("【关 键 词】")) {
for (int z = 0; z < e.child(1).children().size() && z < 8; z++) {
keys[i][z] = e.child(1).children().get(z).text();
//System.out.println(keys[i][z]);
}
}
if (e.child(0).text().equals("【基 金】")) {
funds[i] = e.child(1).text();
//System.out.println(funds[i]);
}
if (e.child(0).text().equals("【作者机构】")) {
if (e.child(1).children().size() == 0) {
units[i] = "西北农林科技大学黄土高原土壤侵蚀与旱地农业国家重点实验室";
} else {
units[i] = e.child(1).child(0).text();
}
//System.out.println(units[i]);
}
if (e.child(0).text().equals("【摘 要】")) {
abstracts[i] = e.child(1).text();
//System.out.println(abstracts[i]);
}
//System.out.println(e.child(0).text());
}
LW lw = new LW();
lw.setSubject(subjects.get(i + 1).child(1).text());
lw.setAuthor(authors.get(i + 1).child(2).text());
lw.setType("cssci");
if (quotenums[i] != null) lw.setQuote_num(quotenums[i]);
else lw.setQuote_num("");
if (downloadnums[i] != null) lw.setDownload_num(downloadnums[i]);
else lw.setDownload_num("");
lw.setPeriodical(periodicals[i]);
lw.setPublish_time(publishtimes[i]);
if (keys[i][0] != null) lw.setKey(keys[i][0]);
else lw.setKey("");
if (keys[i][1] != null) lw.setKey1(keys[i][1]);
else lw.setKey1("");
if (keys[i][2] != null) lw.setKey2(keys[i][2]);
else lw.setKey2("");
if (keys[i][3] != null) lw.setKey3(keys[i][3]);
else lw.setKey3("");
if (keys[i][4] != null) lw.setKey4(keys[i][4]);
else lw.setKey4("");
if (keys[i][5] != null) lw.setKey5(keys[i][5]);
else lw.setKey5("");
if (keys[i][6] != null) lw.setKey6(keys[i][6]);
else lw.setKey6("");
if (keys[i][7] != null) lw.setKey7(keys[i][7]);
else lw.setKey7("");
if (funds[i] != null) lw.setFund(funds[i]);
else lw.setFund("");
if (units[i] != null) lw.setUnit(units[i]);
else lw.setUnit("");
lw.setAbstract1(abstracts[i]);
lws.add(lw);
LW.lws.add(lw);
// funds[i] = key.get(5).child(1).text();
// System.out.println(funds[i]);
}
for (int i = 0; i < 50; i++) {
}
return;
}
}
2.pachong.class
package com.example.util;
import com.example.mapper.bookmapper;
import com.example.service.bookservice;
import com.example.service.serviceimpl.bookserviceimpl;
import org.apache.xmlbeans.impl.xb.xsdschema.Public;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Component;
import org.springframework.stereotype.Controller;
import org.springframework.stereotype.Service;
import org.springframework.test.context.junit4.SpringRunner;
import javax.annotation.PostConstruct;
import javax.annotation.Resource;
import java.io.*;
import java.net.*;
import java.security.PublicKey;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Service
public class pachong{
@Autowired
bookmapper bm;
public static void main(String[] args) throws IOException, InterruptedException {
}
public ArrayList<LW> palunwen(Integer q){
String path="";
ArrayList<LW>lws = new ArrayList<>();
// String [] ips = {"120.79.64.147","175.148.74.121","139.9.195.202","60.167.132.223"};
// String [] ports = {"8118","1133","8118","808"};
// System.getProperties().setProperty("http.proxyHost", ips[q%5]);
// System.getProperties().setProperty("http.proxyPort", ports[q%5]);
// System.err.println(ips[q%5]+":"+ports[q%5]);
try {
path = "http://qikan.chaoxing.com/searchjour?sw=" + URLEncoder.encode("西北农林科技大学", "utf-8") + "&stryear=13&nosim=1&size=50&x=0_900&pages="+String.valueOf(q);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
Random r = new Random();
String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",
"Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0"};
int k = r.nextInt(14);
String ip = null;
ip = ips1().get(0);
Document doc = null;
try {
doc = Jsoup.connect(path)
.timeout(10000)
.ignoreHttpErrors(true)
.userAgent(ua[k])
// .cookie("msign_dsr", "1592578548328")
// .cookie("search_uuid", "59d60d20%2d888c%2d4339%2d893c%2d3f2df0b416a9")
// .cookie("UM_distinctid", "172cd13faab18a-0cc175a1ccab1-3c3f5a0c-1fa400-172cd13faac61e")
// .cookie("__dxca", "7959aeaa-2cb3-44b4-ac6d-5a19f2ac1261")
// .cookie("lv", "0")
// .cookie("chaoxinguser", "1")
// .cookie("uname", "")
// .cookie("_uid", "143680086")
// .cookie("uf", "f9866f9a46b70622b0dee5ad98478a8391831cfec1654dd57a4fb73fc6375849f7893fe5a7d754cdff17dba0ec4416549b0594e13f4b452fbdd6b93a431584911471850d8bf7e34c24848f3d3f307228570bdddc4e9885c5")
// .cookie("_d", "1592928430294")
// .cookie("UID", "143680086")
// .cookie("vc", "A5267B905B437CE795E472D324AD65E0")
// .cookie("vc2", "C473E4E0C10364B7C0A31DE46070C411")
// .cookie("vc3", "C9JC%2Ff8LUHJ%2FbURcxo1vggUNgTmarTCVJFGMM3kP7MoczjjCxUPi89UGJjjLU4sQ6WNHQgZOUN1mj6sDYiMWNCcwVw%2BMLRe1D3y5d%2FofpoF2AMh3LToyTKUsC9ykkTfsWnWiDPL32C%2Bziy%2BnJFuaW5KB9%2FlDulddw%2B9thByI%2Bvg%3Daaeb61274732c450c6a62c27f9001cce")
// .cookie("xxtenc", "3fa0ee7c7d4f9d548e4bc1d7336c838c")
// .cookie("duxiu", "userName%5fdsr%2c%3dxbnlkjdx%2c%21userid%5fdsr%2c%3d1406%2c%21char%5fdsr%2c%3d%u78cb%2c%21metaType%2c%3d353%2c%21dsr%5ffrom%2c%3d1%2c%21logo%5fdsr%2c%3dlogo0408%2ejpg%2c%21logosmall%5fdsr%2c%3dsmall0408%2ejpg%2c%21title%5fdsr%2c%3d%u897f%u5317%u519c%u6797%u79d1%u6280%u5927%u5b66%2c%21url%5fdsr%2c%3debook%2c%21compcode%5fdsr%2c%3d1176%2c%21province%5fdsr%2c%3d%u9655%u897f%2c%21readDom%2c%3d0%2c%21isdomain%2c%3d61707%2c%21showcol%2c%3d0%2c%21hu%2c%3d0%2c%21uscol%2c%3d0%2c%21isfirst%2c%3d0%2c%21istest%2c%3d0%2c%21cdb%2c%3d0%2c%21og%2c%3d1%2c%21ogvalue%2c%3d7%2c%21testornot%2c%3d1%2c%21remind%2c%3d0%2c%21datecount%2c%3d3478%2c%21userIPType%2c%3d2%2c%21lt%2c%3d0%2c%21ttt%2c%3dfxlogin%2echaoxing%2c%21enc%5fdsr%2c%3dD5E78CB6EBD2A70090256E4C463A3566")
.get();
} catch (IOException e) {
return lws;
}
Elements subjects = doc.select("#liebiaoDivId tr");
Elements authors = doc.select("#liebiaoDivId tr");
Elements downloadnum = doc.select("#liebiaoDivId tr");
Elements quotenum = doc.select("#liebiaoDivId tr");
String[] pathes = new String[50];
String[] quotenums = new String[50];
String[] downloadnums = new String[50];
String[] periodicals = new String[50];
String[] publishtimes = new String[50];
String[] funds = new String[50];
String[][] keys = new String[50][8];
String[] units = new String[50];
String[] abstracts = new String[50];
for (int i = 0; i < 50; i++) {
pathes[i] = "http://qikan.chaoxing.com" + subjects.get(i + 1).child(1).child(0).attr("href").trim();
quotenums[i] = quotenum.get(i + 1).child(6).child(0).text();
downloadnums[i] = downloadnum.get(i + 1).child(7).child(0).text();
periodicals[i] = downloadnum.get(i + 1).child(3).child(0).text();
if (downloadnum.get(i + 1).child(5).child(0).text().length() == 8) {
publishtimes[i] = downloadnum.get(i + 1).child(5).child(0).text().substring(0, 4) + "-0" + String.valueOf(Integer.parseInt(String.valueOf(downloadnum.get(i + 1).child(5).child(0).text().charAt(6))));
} else if (downloadnum.get(i + 1).child(5).child(0).text().length() > 8&&(Integer.valueOf(downloadnum.get(i + 1).child(5).child(0).text().charAt(6))<=9)&&(Integer.valueOf(downloadnum.get(i + 1).child(5).child(0).text().charAt(6))>=0)) {
publishtimes[i] = downloadnum.get(i + 1).child(5).child(0).text().substring(0, 4) + "-" + String.valueOf(Integer.valueOf(downloadnum.get(i + 1).child(5).child(0).text().substring(6, 8)) / 2);
}
else{
publishtimes[i] = "1";
}
//System.out.println(publishtimes[i]);
//System.out.println(periodicals[i]);
//System.out.println(downloadnums[i]);
//System.out.println(quotenums[i]);
//System.out.println(pathes[i]);
}
int num=0;
ArrayList<String> ipa = ips1();
for (int i = 0; i < 50; i++) {
if(num>195){
ipa = ips1();
}
if(i%10==0){
ip = ipa.get(num);
num++;
String[] r1 = ip.split(":");
System.out.println(ip);
System.getProperties().setProperty("http.proxyHost", r1[0]);
System.getProperties().setProperty("http.proxyPort", r1[1]);
System.err.println(r1[0]+":"+r1[1]);
}
Document doc1 = null;
try {
doc1 = Jsoup.connect(pathes[i])
.timeout(10000)
.ignoreHttpErrors(true)
.userAgent(ua[k])
// .cookie("msign_dsr", "1592578548328")
// .cookie("search_uuid", "59d60d20%2d888c%2d4339%2d893c%2d3f2df0b416a9")
// .cookie("UM_distinctid", "172cd13faab18a-0cc175a1ccab1-3c3f5a0c-1fa400-172cd13faac61e")
// .cookie("__dxca", "7959aeaa-2cb3-44b4-ac6d-5a19f2ac1261")
// .cookie("lv", "0")
// .cookie("chaoxinguser", "1")
// .cookie("uname", "")
// .cookie("_uid", "143680086")
// .cookie("uf", "f9866f9a46b70622b0dee5ad98478a8391831cfec1654dd57a4fb73fc6375849f7893fe5a7d754cdff17dba0ec4416549b0594e13f4b452fbdd6b93a431584911471850d8bf7e34c24848f3d3f307228570bdddc4e9885c5")
// .cookie("_d", "1592928430294")
// .cookie("UID", "143680086")
// .cookie("vc", "A5267B905B437CE795E472D324AD65E0")
// .cookie("vc2", "C473E4E0C10364B7C0A31DE46070C411")
// .cookie("vc3", "C9JC%2Ff8LUHJ%2FbURcxo1vggUNgTmarTCVJFGMM3kP7MoczjjCxUPi89UGJjjLU4sQ6WNHQgZOUN1mj6sDYiMWNCcwVw%2BMLRe1D3y5d%2FofpoF2AMh3LToyTKUsC9ykkTfsWnWiDPL32C%2Bziy%2BnJFuaW5KB9%2FlDulddw%2B9thByI%2Bvg%3Daaeb61274732c450c6a62c27f9001cce")
// .cookie("xxtenc", "3fa0ee7c7d4f9d548e4bc1d7336c838c")
// .cookie("duxiu", "userName%5fdsr%2c%3dxbnlkjdx%2c%21userid%5fdsr%2c%3d1406%2c%21char%5fdsr%2c%3d%u78cb%2c%21metaType%2c%3d353%2c%21dsr%5ffrom%2c%3d1%2c%21logo%5fdsr%2c%3dlogo0408%2ejpg%2c%21logosmall%5fdsr%2c%3dsmall0408%2ejpg%2c%21title%5fdsr%2c%3d%u897f%u5317%u519c%u6797%u79d1%u6280%u5927%u5b66%2c%21url%5fdsr%2c%3debook%2c%21compcode%5fdsr%2c%3d1176%2c%21province%5fdsr%2c%3d%u9655%u897f%2c%21readDom%2c%3d0%2c%21isdomain%2c%3d61707%2c%21showcol%2c%3d0%2c%21hu%2c%3d0%2c%21uscol%2c%3d0%2c%21isfirst%2c%3d0%2c%21istest%2c%3d0%2c%21cdb%2c%3d0%2c%21og%2c%3d1%2c%21ogvalue%2c%3d7%2c%21testornot%2c%3d1%2c%21remind%2c%3d0%2c%21datecount%2c%3d3478%2c%21userIPType%2c%3d2%2c%21lt%2c%3d0%2c%21ttt%2c%3dfxlogin%2echaoxing%2c%21enc%5fdsr%2c%3dD5E78CB6EBD2A70090256E4C463A3566")
.get();
} catch (IOException e) {
return lws;
}
Elements key = doc1.select(".Fmian1 table tbody tr");
System.out.println(i+1+": "+pathes[i]);
//【作者机构】
//【来 源】
//【分 类 号】
//【分类导航】
//【关 键 词】
//【基 金】
//【摘 要】
//【统计数据】
//【全文获取】
for(Element e:key){
if(e.child(0).text().equals("【关 键 词】")){
for (int z = 0; z < e.child(1).children().size() && z < 8; z++) {
keys[i][z] = e.child(1).children().get(z).text();
//System.out.println(keys[i][z]);
}
}
if(e.child(0).text().equals("【基 金】")){
funds[i] = e.child(1).text();
//System.out.println(funds[i]);
}
if(e.child(0).text().equals("【作者机构】")){
if(e.child(1).children().size()==0){
units[i] = "西北农林科技大学黄土高原土壤侵蚀与旱地农业国家重点实验室";
}
else{
units[i] = e.child(1).child(0).text();
}
//System.out.println(units[i]);
}
if(e.child(0).text().equals("【摘 要】")){
abstracts[i] = e.child(1).text();
//System.out.println(abstracts[i]);
}
//System.out.println(e.child(0).text());
}
// funds[i] = key.get(5).child(1).text();
// System.out.println(funds[i]);
}
for (int i = 0; i < 50; i++) {
LW lw = new LW();
lw.setSubject(subjects.get(i + 1).child(1).text());
lw.setAuthor(authors.get(i + 1).child(2).text());
lw.setType("cssci");
if (quotenums[i] != null) lw.setQuote_num(quotenums[i]);
else lw.setQuote_num("");
if (downloadnums[i] != null) lw.setDownload_num(downloadnums[i]);
else lw.setDownload_num("");
lw.setPeriodical(periodicals[i]);
lw.setPublish_time(publishtimes[i]);
if (keys[i][0] != null) lw.setKey(keys[i][0]);
else lw.setKey("");
if (keys[i][1] != null) lw.setKey1(keys[i][1]);
else lw.setKey1("");
if (keys[i][2] != null) lw.setKey2(keys[i][2]);
else lw.setKey2("");
if (keys[i][3] != null) lw.setKey3(keys[i][3]);
else lw.setKey3("");
if (keys[i][4] != null) lw.setKey4(keys[i][4]);
else lw.setKey4("");
if (keys[i][5] != null) lw.setKey5(keys[i][5]);
else lw.setKey5("");
if (keys[i][6] != null) lw.setKey6(keys[i][6]);
else lw.setKey6("");
if (keys[i][7] != null) lw.setKey7(keys[i][7]);
else lw.setKey7("");
if (funds[i] != null) lw.setFund(funds[i]);
else lw.setFund("");
if (units[i] != null) lw.setUnit(units[i]);
else lw.setUnit("");
lw.setAbstract1(abstracts[i]);
lws.add(lw);
}
return lws;
}
public ArrayList<String> ips() throws IOException {
String path = "http://api.xiequ.cn/VAD/GetIp.aspx?act=get&num=100&time=30&plat=1&re=1&type=0&so=1&ow=1&spl=1&addr=&db=1";// 要获得html页面内容的地址
URL url = new URL(path);// 创建url对象
HttpURLConnection conn = (HttpURLConnection) url.openConnection();// 打开连接
conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
conn.setRequestProperty("contentType", "GBK"); // 设置url中文参数编码
conn.setConnectTimeout(5 * 1000);// 请求的时间
conn.setRequestMethod("GET");// 请求方式
InputStream inStream = conn.getInputStream();
// readLesoSysXML(inStream);
BufferedReader in = new BufferedReader(new InputStreamReader(inStream, "GBK"));
StringBuffer buffer = new StringBuffer();
ArrayList<String> ipp = new ArrayList<String>();
String line = "";
// 读取获取到内容的最后一行,写入
while ((line = in.readLine()) != null) {
buffer.append(line);
ipp.add(line);
System.out.println(line);
}
String str = buffer.toString();
// JSONObject json1 = JSONObject.parseObject(str);
// JSONArray jsons = JSONArray.parseArray(json1.get("data").toString());
// for(Object json:jsons){
// JSONObject ips = JSONObject.parseObject(json.toString());
// String ip = ips.get("IP").toString();
// System.out.println(ip);
// ipp.add(ip);
// }
return ipp;
}
public ArrayList<String> ips1(){
String path = "http://119.45.8.232/Api/?k=NRAX9VIN451I35QJP36SEM&num=1&type=1&f=1&repeat=1&respone=0&ptn=1";// 要获得html页面内容的地址
URL url = null;// 创建url对象
try {
url = new URL(path);
} catch (MalformedURLException e) {
ArrayList<String>s = new ArrayList<>();
s.add("1.1.1.1:80");
return s;
}
HttpURLConnection conn = null;// 打开连接
try {
conn = (HttpURLConnection) url.openConnection();
} catch (IOException e) {
ArrayList<String>s = new ArrayList<>();
s.add("58.218.200.248:22069");
return s;
}
conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
conn.setRequestProperty("contentType", "utf-8"); // 设置url中文参数编码
conn.setConnectTimeout(5 * 1000);// 请求的时间
try {
conn.setRequestMethod("GET");// 请求方式
} catch (ProtocolException e) {
ArrayList<String>s = new ArrayList<>();
s.add("1.1.1.1:80");
return s;
}
InputStream inStream = null;
try {
inStream = conn.getInputStream();
} catch (IOException e) {
ArrayList<String>s = new ArrayList<>();
s.add("1.1.1.1:80");
return s;
}
// readLesoSysXML(inStream);
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(inStream, "utf-8"));
} catch (UnsupportedEncodingException e) {
ArrayList<String>s = new ArrayList<>();
s.add("1.1.1.1:80");
return s;
}
StringBuffer buffer = new StringBuffer();
ArrayList<String> ipp = new ArrayList<String>();
String line = "";
// 读取获取到内容的最后一行,写入
while (true) {
try {
if (!((line = in.readLine()) != null)) break;
} catch (IOException e) {
ArrayList<String>s = new ArrayList<>();
s.add("1.1.1.1:80");
return s;
}
buffer.append(line);
ipp.add(line);
System.out.println(line);
}
String str = buffer.toString();
// JSONObject json1 = JSONObject.parseObject(str);
// JSONArray jsons = JSONArray.parseArray(json1.get("data").toString());
// for(Object json:jsons){
// JSONObject ips = JSONObject.parseObject(json.toString());
// String ip = ips.get("IP").toString();
// System.out.println(ip);
// ipp.add(ip);
// }
return ipp;
}
public void xiancheng() throws IOException, InterruptedException {
ExecutorService pool = Executors.newCachedThreadPool();
// ArrayList<String>ips = new ArrayList<>();
// try {
// File file = new File("C:/IMAGES/ip.txt");
// BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
// String strLine = null;
// int lineCount = 1;
// while(null != (strLine = bufferedReader.readLine())){
// System.err.println(strLine);
// ips.add(strLine);
// lineCount++;
// }
// }catch(Exception e){
// e.printStackTrace();
// }
ArrayList ips = ips();
for (int i = 0; i < 1500; i++) {
xiancheng xian = new xiancheng(ips, i);
xian.run();
//pool.execute(xian);
for(LW lw:xian.lws) {
if (!bm.searchlw(lw.getSubject())) {
if (bm.lwcount() == null) {
lw.setId(String.valueOf("1"));
} else {
lw.setId(String.valueOf(Integer.valueOf(bm.lwcount()) + 1));
}
}
bm.insertlw(lw);
System.out.println(lw.toString());
}
}
}
}