import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.ysccc.tools.Emptys;
public class DataBugger {
/**
* 信任任何站点,实现https页面的正常访问
*
*/
public static void trustEveryone() {
try {
HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
public boolean verify(String hostname, SSLSession session) {
return true;
}
});
SSLContext context = SSLContext.getInstance("TLS");
context.init(null, new X509TrustManager[] { new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
}
public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
}
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
} }, new SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory());
} catch (Exception e) {
// e.printStackTrace();
}
}
public static void main(String[] args) throws IOException, InterruptedException {
trustEveryone();
Map<String, String> conn = new LinkedHashMap<>();
conn.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
conn.put("Accept-Encoding", "gzip, deflate, br");
conn.put("Accept-Language", "zh-CN,zh;q=0.9");
conn.put("Cache-Control", "max-age=0");
conn.put("Connection", "keep-alive");
conn.put("Host", "blog.maxleap.cn");
conn.put("Upgrade-Insecure-Requests", "1");
conn.put("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36");
StringBuilder buf = new StringBuilder();
for (int i = 0; i < 16; i++) {
System.out.println("当前页码" + i);
Thread.sleep(1000);
Document doc = Jsoup.connect("https://waimao.mingluji.com/%E5%9B%9B%E5%B7%9D?page=" + i).headers(conn).get();
Elements spans = doc.select("div.view-content span.field-content a");
for (Element span : spans) {
buf.append("https://waimao.mingluji.com").append(span.attr("href")).append("\r\n");
}
FileUtils.write(new File("d:\\address.txt"), buf.toString(), Charset.forName("utf-8"));
}
List<String> lines = FileUtils.readLines(new File("d:\\address.txt"), Charset.forName("utf-8"));
StringBuilder sbuf = new StringBuilder();
int i = 0;
int size = lines.size();
for (String url : lines) {
System.out.println("当前企业:" + (i++) + ",企业总数:" + size);
Thread.sleep(1000);
if (Emptys.isNotEmpty(url) && StringUtils.contains(url, "https://")) {
Document doc = Jsoup.connect(url).headers(conn).get();
Elements spans = doc.select("div.content fieldset span.field-item span");
for (Element span : spans) {
sbuf.append(span.text()).append("\t");
}
sbuf.append("\r\n");
}
}
FileUtils.write(new File("d:\\merchant.txt"), sbuf.toString(), Charset.forName("utf-8"));
}
}
转载于:https://my.oschina.net/u/1261213/blog/2243441