Java爬取接口的数据
首先让大家看看爬取的数据结果 46884 条数据
这是爬出文件ing
这是收获
废话不多说直接上代码
因为有数据才能爬文件所以来一段爬取数据的代码先
pom文件添加的依赖包
只添加一下关键的包
<!--commons-->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<!--commons-->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
</dependency>
<!-- MybatisPlus -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-boot-starter</artifactId>
<version>3.1.1</version>
</dependency>
<!-- Gson -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.5</version>
</dependency>
<!-- okhttp -->
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.14.2</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>4.5.16</version>
</dependency>
关键代码
/**
*
* @param param1 参数1
* @param param2 参数2
* @param param3 参数3
*
*/
private void getDataToLocalDataBase(String param1 , String param2 , String param3) {
HttpParam httpParam = new HttpParam();
httpParam.setApiUrl("爬取的网站");
httpParam.setApiPath("接口地址");
Map<String, String> parms = new HashMap<>();
parms.put("param1 ", param1 );
parms.put("param2 ", param2 );
parms.put("param3 ", param3 );
//....更多参数...
/* parms.put("strCustName","");*/
//创建格式化参数
Gson paramGson = new GsonBuilder().create();
String requestParam = paramGson.toJson(parms);
try {
//post请求
HttpResult postResult = HttpUtil.post(httpParam, requestParam);
String result = postResult.getResult();
int status = postResult.getStatus();
Gson gson = new Gson();
if (status == 200) {
if (!StringUtils.isEmpty(result)) {
JsonObject jsonObject = (JsonObject) new JsonParser().parse(result);
JsonElement jsonElement = jsonObject.get("result");
String newResult = jsonElement.toString();
//xxData 与接口值返回相同的实体类 List<xxData>这里也可也是其他类型 按需去做
List<xxData> list = gson.fromJson(newResult, new TypeToken<List<xxData>>() {
}.getType());
log.info("数据有:{}",list.size());
if (list != null && list.size() > 0) {
//业务代码...把数据插入到本地数据库
} else {
log.info("无数据");
}
} else {
log.error("错误数据{}", result );
}
} catch (Exception e) {
e.printStackTrace();
}
}
上面用的工具类
- HttpParam
import okhttp3.MediaType;
public class HttpParam {
//编码格式
public static final MediaType MEDIA_TYPE_JSON = MediaType.parse("application/json; charset=utf-8");
/**
* 接口URL
*/
private String apiUrl;
/**
* 接口路径
*/
private String apiPath;
/**
* 读取超时时间
*/
private int readTimeout = 30 * 1000;
/**
* 写入超时时间
*/
private int writeTimeout = 30 * 1000;
/**
* 连接超时时间
*/
private int connectTimeout = 2 * 1000;
/**
* 编码类型
*/
private MediaType mediaType = MEDIA_TYPE_JSON;
public String getApiUrl() {
return apiUrl;
}
public void setApiUrl(String apiUrl) {
this.apiUrl = apiUrl;
}
public String getApiPath() {
return apiPath;
}
public void setApiPath(String apiPath) {
this.apiPath = apiPath;
}
public int getReadTimeout() {
return readTimeout;
}
public void setReadTimeout(int readTimeout) {
this.readTimeout = readTimeout;
}
public int getWriteTimeout() {
return writeTimeout;
}
public void setWriteTimeout(int writeTimeout) {
this.writeTimeout = writeTimeout;
}
public int getConnectTimeout() {
return connectTimeout;
}
public void setConnectTimeout(int connectTimeout) {
this.connectTimeout = connectTimeout;
}
public MediaType getMediaType() {
return mediaType;
}
public void setMediaType(MediaType mediaType) {
this.mediaType = mediaType;
}
}
- HttpResult 这个大家接收的可根据需要自定义
public class HttpResult<T> {
private int status;
private String result;
private T resultObject;
public HttpResult() {
}
public HttpResult(int status, String result, T resultObject) {
this.status = status;
this.result = result;
this.resultObject = resultObject;
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public String getResult() {
return result;
}
public void setResult(String result) {
this.result = result;
}
public T getResultObject() {
return resultObject;
}
public void setResultObject(T resultObject) {
this.resultObject = resultObject;
}
}
重点来了
- HttpUtil
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Slf4j
public class HttpUtil {
private static Gson gson = new GsonBuilder().serializeNulls().disableHtmlEscaping().create();
/**
* get请求
*/
public static String get(HttpParam restParam) throws Exception {
String url = restParam.getApiUrl();
if (restParam.getApiPath() != null) {
url = url+restParam.getApiPath();
}
Request request = new Request.Builder()
.url(url)
.get()
.build();
return exec(restParam, request).getResult();
}
/**
* get请求
*/
public static <T> HttpResult<T> get(HttpParam restParam, Class<T> tClass) throws Exception {
String url = restParam.getApiUrl();
if (restParam.getApiPath() != null) {
url = url+restParam.getApiPath();
}
Request request = new Request.Builder()
.url(url)
.get()
.build();
return exec(restParam, request, tClass);
}
/**
* POST请求json数据
*/
public static <T> HttpResult<T> post(HttpParam restParam, Class<T> tClass) throws Exception {
String url = restParam.getApiUrl();
if (restParam.getApiPath() != null) {
url = url + restParam.getApiPath();
}
Request request = new Request.Builder().url(url).build();
return exec(restParam, request, tClass);
}
/**
* POST请求json数据
*/
public static <T> HttpResult<T> post(HttpParam restParam, String reqJsonData, Class<T> tClass) throws Exception {
String url = restParam.getApiUrl();
if (restParam.getApiPath() != null) {
url = url+restParam.getApiPath();
}
RequestBody body = RequestBody.create(restParam.getMediaType(), reqJsonData);
Request request = new Request.Builder()
.url(url).post(body).build();
return exec(restParam, request, tClass);
}
/**
* POST请求map数据
*/
public static <T> HttpResult<T> post(HttpParam restParam, Map<String, String> parms, Class<T> tClass) throws Exception {
String url = restParam.getApiUrl();
if (restParam.getApiPath() != null) {
url = url+restParam.getApiPath();
}
FormBody.Builder builder = new FormBody.Builder();
if (parms != null) {
for (Map.Entry<String, String> entry : parms.entrySet()) {
builder.add(entry.getKey(), entry.getValue());
}
}
FormBody body = builder.build();
Request request = new Request.Builder()
.url(url)
.post(body)
.build();
return exec(restParam, request, tClass);
}
/**
* POST请求map数据 返回结果
*/
public static <T> HttpResult<T> post(HttpParam restParam, String reqJsonData) throws Exception {
String url = restParam.getApiUrl();
if (restParam.getApiPath() != null) {
url = url+restParam.getApiPath();
}
RequestBody body = RequestBody.create(restParam.getMediaType(), reqJsonData);
Request request = new Request.Builder()
.url(url).post(body).build();
return exec(restParam, request);
}
/**
* 返回值封装成对象
*/
private static <T> HttpResult<T> exec(
HttpParam restParam,
Request request,
Class<T> tClass) throws Exception {
HttpResult clientResult = exec(restParam, request);
String result = clientResult.getResult();
int status = clientResult.getStatus();
T t = null;
if (status == 200) {
if (result != null && "".equalsIgnoreCase(result)) {
t = gson.fromJson(result, tClass);
}
} else {
try {
result = gson.fromJson(result, String.class);
} catch (Exception ex) {
ex.printStackTrace();
}
}
return new HttpResult<>(clientResult.getStatus(), result, t);
}
/**
* 执行方法
*/
private static HttpResult exec(
HttpParam restParam,
Request request) throws Exception {
HttpResult result = null;
okhttp3.OkHttpClient client = null;
ResponseBody responseBody = null;
try {
client = new okhttp3.OkHttpClient();
client.newBuilder()
.connectTimeout(restParam.getConnectTimeout(), TimeUnit.MILLISECONDS)
.readTimeout(restParam.getReadTimeout(), TimeUnit.MILLISECONDS)
.writeTimeout(restParam.getWriteTimeout(), TimeUnit.MILLISECONDS);
Response response = client.newCall(request).execute();
if (response.isSuccessful()) {
responseBody = response.body();
if (responseBody != null) {
String responseString = responseBody.string();
result = new HttpResult<>(response.code(), responseString, null);
}
} else {
throw new Exception(response.message());
}
} catch (Exception ex) {
throw new Exception(ex.getMessage());
} finally {
if (responseBody != null) {
responseBody.close();
}
if (client != null) {
client.dispatcher().executorService().shutdown(); //清除并关闭线程池
client.connectionPool().evictAll(); //清除并关闭连接池
try {
if (client.cache() != null) {
client.cache().close(); //清除cache
}
} catch (IOException e) {
throw new Exception(e.getMessage());
}
}
}
return result;
}
}
Java爬取文件
爬取数据的就告一段落了 接下来 就是拿这些数据去爬文件了
其实网上找了很多很多的文章爬文件的都不靠谱,最后借到了协助三行代码搞定了
在这之前还是要导入关键的工具包滴,就是最上面的pom文件包
URL url = new URL("文件地址");
File folder = new File("本地存储文件地址");
FileUtils.copyURLToFile(url , folder);
github地址
看完有用就点个赞吧