JAVA爬取json数据
爬取码市上的项目生成文档方便查看。
查看码市的项目网址,便可看出这些项目都不是直接加载出来的,通过xhr看出是通过json的形式再次获取到的,获取到的地址中也可以看出分页也是通过最后的数字参数来进行区分的。
![](https://img-blog.csdnimg.cn/20200719134811483.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3p6aF9wcmlkZQ==,size_16,color_FFFFFF,t_70)
代码如下
package Test.Write;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.eclipse.jetty.util.StringUtil;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
/**
* 码市job
*
* @author Zzh
*
*/
public class CatchJobs {
/**
* 主程序
*
* @param args
*/
public static void main(String[] args) {
// json集
List<String> jsons = new ArrayList<String>();
String htmlmain = "https://codemart.com/api/project?page=";
for (int i = 1; i <= 10; i++) {
String json = getJson(htmlmain + i);
jsons.add(json);
}
List<Job> jobList = parseJson(jsons);
exportExcel(jobList);
}
/**
* 获取网页html代码
*
* @param 网址
*/
private static String getJson(String path) {
// 保存整个html文档的数据
StringBuffer html = new StringBuffer();
try {
// 发起一个url网址的请求
URL url = new URL(path);
URLConnection connection = url.openConnection();
// 设置请求头
connection.addRequestProperty("Accept", "application/json");
// 获取网页的数据流
InputStream input = connection.getInputStream();
InputStreamReader reader = new InputStreamReader(input, "UTF-8");
BufferedReader bufferedReader = new BufferedReader(reader);
// 解析并且获取InputStream中具体的数据,并且输出到控制台
String line = "";
while ((line = bufferedReader.readLine()) != null) {
html.append(line);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return html.toString();
}
/**
* 获取job
*
* @param jobJsons
* @return jobs
*/
private static List<Job> parseJson(List<String> jobJsons) {
// job集
List<Job> jobList = new ArrayList<Job>();
// 按每页的json数据循环
for (String jobJson : jobJsons) {
JSONObject json = JSONObject.fromObject(jobJson);
// 获取每页下的具体job数据
JSONArray rewardsJson = (JSONArray) json.get("rewards");
for (int i = 0; i < rewardsJson.size(); i++) {
Job job = new Job();
// 具体job信息
JSONObject reward = (JSONObject) rewardsJson.get(i);
job.setId(StringUtil.isNotBlank(reward.getString("id")) ? reward.getString("id") : "");
job.setName(StringUtil.isNotBlank(reward.getString("name")) ? reward.getString("name") : "");
job.setPrice(StringUtil.isNotBlank(reward.getString("price")) ? reward.getString("price") : "");
job.setStatusText(
StringUtil.isNotBlank(reward.getString("statusText")) ? reward.getString("statusText") : "");
job.setDescription(
StringUtil.isNotBlank(reward.getString("description")) ? reward.getString("description") : "");
job.setDuration(
StringUtil.isNotBlank(reward.getString("duration")) ? reward.getString("duration") : "");
job.setRoles(StringUtil.isNotBlank(reward.getString("roles")) ? reward.getString("roles") : "");
jobList.add(job);
}
}
return jobList;
}
/**
* 导出excel
*
* @param job内容
*/
private static void exportExcel(List<Job> jobs) {
// 定义表头
String[] title = { "项目编号", "名字", "价格", "状态", "介绍", "周期", "分类" };
// 创建excel工作簿
HSSFWorkbook workbook = new HSSFWorkbook();
// 创建工作表sheet
HSSFSheet sheet = workbook.createSheet();
// 创建第一行
HSSFRow row = sheet.createRow(0);
HSSFCell cell = null;
// 插入第一行数据的表头
for (int i = 0; i < title.length; i++) {
cell = row.createCell(i);
cell.setCellValue(title[i]);
}
// 写入数据
int i = 1;
for (Job job : jobs) {
HSSFRow nrow = sheet.createRow(i);
HSSFCell ncell = nrow.createCell(0);
ncell.setCellValue(job.getId());
ncell = nrow.createCell(1);
ncell.setCellValue(job.getName());
ncell = nrow.createCell(2);
ncell.setCellValue(job.getPrice());
ncell = nrow.createCell(3);
ncell.setCellValue(job.getStatusText());
ncell = nrow.createCell(4);
ncell.setCellValue(job.getDescription());
ncell = nrow.createCell(5);
ncell.setCellValue(job.getDuration());
ncell = nrow.createCell(6);
ncell.setCellValue(job.getRoles());
i++;
}
// 创建excel文件
File file = new File("d://码市.xlsx");
try {
file.createNewFile();
// 将excel写入
FileOutputStream stream = FileUtils.openOutputStream(file);
workbook.write(stream);
stream.close();
System.out.println("出力好了");
} catch (IOException e) {
e.printStackTrace();
}
}
}
这样就可以通过生成的文件直接查看啦