day02 Java定时任务,爬取纽约共享单车数据
文章目录
一、JAVA的定时任务方法
1. 创建运行器
// 内部类
class MyRunnable implements Runnable{
@Override
public void run() {
System.out.println("此处为需要执行的东西...");
}
}
2. 创建服务并设定开启
@Component
public class GetStationStatus {
// 使用定时服务,基于线程池设计的定时任务类
ScheduledExecutorService scheduledExecutorService = Executors.newSingleThreadScheduledExecutor();
public GetStationStatus(){
MyRunnable myRunnable = new MyRunnable();
// 设定定时速率执行
scheduledExecutorService.scheduleAtFixedRate(myRunnable,0,20, TimeUnit.SECONDS);
// 参数分别为 周期性任务、第一次启动延迟时间、任务间隔时间、时间的单位
}
// 内部线程类
class MyRunnable implements Runnable{
@Override
public void run() {
System.out.println("此处为需要执行的东西...");
}
}
}
二、HttpClient 整合使用
1. 依赖包
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
2. 简单使用
使用HttpClient发送请求、接收响应很简单,一般需要如下几步即可。
-
创建HttpClient对象。
-
创建请求方法的实例,并指定请求URL。如果需要发送GET请求,创建HttpGet对象;如果需要发送POST请求,创建HttpPost对象。
-
如果需要发送请求参数,可调用HttpGet、HttpPost共同的setParams(HttpParams params)方法来添加请求参数;对于HttpPost对象而言,也可调用setEntity(HttpEntity entity)方法来设置请求参数。
-
调用HttpClient对象的execute(HttpUriRequest request)发送请求,该方法返回一个HttpResponse。
-
调用HttpResponse的getAllHeaders()、getHeaders(String name)等方法可获取服务器的响应头;调用HttpResponse的getEntity()方法可获取HttpEntity对象,该对象包装了服务器的响应内容。程序可通过该对象获取服务器的响应内容。
-
释放连接。无论执行方法是否成功,都必须释放连接
3. 简单的get
@Test
public void TestGet() throws IOException {
String urlTest = "http://baidu.com";
// 1.创建httpclient
CloseableHttpClient httpclient = HttpClients.createDefault();
//2. 创建HttpGet
HttpGet httpGetTest1 = new HttpGet(urlTest);
// 3. 请求执行,获取响应
CloseableHttpResponse response = httpclient.execute(httpGetTest1);
// 4.获取响应实体
HttpEntity entityTest = response.getEntity();
System.out.println(EntityUtils.toString(entityTest, "utf-8"));
response.close();
httpclient.close();
}
HttpClient给我们提供了简易的参数拼接类
- URI uri = new URIBuilder()
- uri.setScheme 协议方式
- uri.setHost 主机名
- uri.setPath 路径
- uri.setParameter 参数
- uri.build() 需要构建
@Test
public void Test1(){
URI uri = null;
try {
uri = new URIBuilder()
.setScheme("http")
.setHost("www.google.com")
.setPath("/search")
.setParameter("q", "httpclient")
.setParameter("btnG", "Google Search")
.setParameter("aq", "f")
.setParameter("oq", "")
.build();
} catch (URISyntaxException e) {
e.printStackTrace();
}
HttpGet httpget = new HttpGet(uri);
System.out.println(httpget.getURI());
}
4. 忽略ssl的封装HttpClientUtils
因为目前需要的爬取对象是需要用https进行访问的,所以目前信任所有
1 创建工具类
package com.tgm.bigdata.utils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.ssl.TrustStrategy;
import org.apache.http.util.EntityUtils;
import javax.net.ssl.SSLContext;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
public class HttpClientUtils {
/*
Http协议GET请求
*/
public static String httpGet(String url) throws Exception{
//初始化HttpClient
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建HttpGet
HttpGet httpGet = new HttpGet(url);
//发起请求,获取response对象
CloseableHttpResponse response = httpClient.execute(httpGet);
//获取请求状态码
//response.getStatusLine().getStatusCode();
//获取返回数据实体对象
HttpEntity entity = response.getEntity();
//转为字符串
String result = EntityUtils.toString(entity,"UTF-8");
return result;
}
/*
Http协议Post请求
*/
public static String httpPost (String url,String json) throws Exception{
//初始HttpClient
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建Post对象
HttpPost httpPost = new HttpPost(url);
//设置Content-Type
httpPost.setHeader("Content-Type","application/json");
//写入JSON数据
httpPost.setEntity(new StringEntity(json));
//发起请求,获取response对象
CloseableHttpResponse response = httpClient.execute(httpPost);
//获取请求码
//response.getStatusLine().getStatusCode();
//获取返回数据实体对象
HttpEntity entity = response.getEntity();
//转为字符串
String result = EntityUtils.toString(entity,"UTF-8");
return result;
}
public static String httpsGet(String url) throws Exception{
CloseableHttpClient hp = createSSLClientDefault();
HttpGet hg = new HttpGet(url);
CloseableHttpResponse response = hp.execute(hg);
HttpEntity entity = response.getEntity();
String content = EntityUtils.toString(entity,"UTF-8");
hp.close();
return content;
}
public static String httpsPost(String url, String json) throws Exception{
CloseableHttpClient hp = createSSLClientDefault();
HttpPost httpPost = new HttpPost(url);
httpPost.setHeader("Content-Type","application/json");
httpPost.setEntity(new StringEntity(json));
CloseableHttpResponse response = hp.execute(httpPost);
HttpEntity entity = response.getEntity();
String content = EntityUtils.toString(entity,"UTF-8");
hp.close();
return content;
}
public static CloseableHttpClient createSSLClientDefault() throws Exception{
SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null, new TrustStrategy(){
//信任所有
public boolean isTrusted(X509Certificate[] chain, String authType) throws CertificateException {
return true;
}
}).build();
SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslContext);
return HttpClients.custom().setSSLSocketFactory(sslsf).build();
}
}
5. 测试调用
@Test
public void TestGetHttps(){
try {
System.out.println(HttpClientUtils.httpsGet("https://gbfs.citibikenyc.com/gbfs/en/station_status.json"));
} catch (Exception e) {
e.printStackTrace();
}
}
成功返回数据
三、Java创建文件并以时间命名
1. 时间工具类封装
package com.tgm.bigdata.utils;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Locale;
public class MyTimeUtils {
/**
* 获取现在时间
*
* @return 返回时间类型 yyyy-MM-dd HH:mm:ss
*/
public static Date getNowDate() {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateString = formatter.format(currentTime);
ParsePosition pos = new ParsePosition(8);
return formatter.parse(dateString, pos);
}
/**
* 获取现在时间
*
* @return 返回短时间格式 yyyy-MM-dd
*/
public static Date getNowDateShort() {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
String dateString = formatter.format(currentTime);
ParsePosition pos = new ParsePosition(8);
return formatter.parse(dateString, pos);
}
/**
* 获取现在时间
*
* @return 返回字符串格式 yyyy-MM-dd HH:mm:ss
*/
public static String getStringDate() {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return formatter.format(currentTime);
}
/**
* 获取现在时间
*
* @return 返回短时间字符串格式yyyy-MM-dd
*/
public static String getStringDateShort() {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
return formatter.format(currentTime);
}
/**
* 获取时间 小时:分;秒 HH:mm:ss
*
*
*/
public static String getTimeShort() {
SimpleDateFormat formatter = new SimpleDateFormat("HH:mm:ss");
Date currentTime = new Date();
return formatter.format(currentTime);
}
/**
* 将长时间格式字符串转换为时间 yyyy-MM-dd HH:mm:ss
*
* @param strDate
* @return
*/
public static Date strToDateLong(String strDate) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
ParsePosition pos = new ParsePosition(0);
return formatter.parse(strDate, pos);
}
/**
* 将长时间格式时间转换为字符串 yyyy-MM-dd HH:mm:ss
*
* @param dateDate
* @return
*/
public static String dateToStrLong(java.util.Date dateDate) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return formatter.format(dateDate);
}
/**
* 将短时间格式时间转换为字符串 yyyy-MM-dd
*
* @param dateDate
* @return
*/
public static String dateToStr(java.util.Date dateDate) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
return formatter.format(dateDate);
}
/**
* 将短时间格式字符串转换为时间 yyyy-MM-dd
*
* @param strDate
* @return
*/
public static Date strToDate(String strDate) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
ParsePosition pos = new ParsePosition(0);
return formatter.parse(strDate, pos);
}
/**
* 得到现在时间
*
* @return
*/
public static Date getNow() {
return new Date();
}
/**
* 得到现在时间
*
* @return 字符串 yyyyMMdd HHmmss
*/
public static String getStringToday() {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd HHmmss");
return formatter.format(currentTime);
}
/**
* 得到现在小时
*/
public static String getHour() {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateString = formatter.format(currentTime);
String hour;
hour = dateString.substring(11, 13);
return hour;
}
/**
* 得到现在分钟
*
* @return
*/
public static String getTime() {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateString = formatter.format(currentTime);
String min;
min = dateString.substring(14, 16);
return min;
}
/**
* 根据用户传入的时间表示格式,返回当前时间的格式 如果是yyyyMMdd,注意字母y不能大写。
*
* @param sformat yyyyMMddhhmmss
* @return
*/
public static String getUserDate(String sformat) {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat(sformat);
return formatter.format(currentTime);
}
}
2. 时间工具类基础测试
@Test
public void test(){
System.out.println(MyTimeUtils.getNowDate());
// null
System.out.println(MyTimeUtils.getNow());
// Sat Jul 09 07:51:20 CST 2022
System.out.println(MyTimeUtils.getNowDateShort());
// null
System.out.println(MyTimeUtils.getStringDate());
// 2022-07-09 07:51:20
System.out.println(MyTimeUtils.getStringDateShort());
// 2022-07-09
System.out.println(MyTimeUtils.getStringToday());
// 20220709 075120
System.out.println(MyTimeUtils.getUserDate("yyyyMMddhhmmss"));
// 20220709075120
}
从测试结果来看,
getStringDateShort
可以用来创建文件夹,getUserDate
是定制类,可以用来创建文件名
3. 新建文件夹及文件名
// 输入内容
String Obj_str = "tgm";
//指定路径如果没有则创建并添加
File file = new File("路径");
//获取父目录
File fileParent = file.getParentFile();
//判断是否存在,如果不存在则创建
if (!fileParent.exists()) {
fileParent.mkdirs();
}
//根据抽象路径创建一个新的空文件
file.createNewFile();
判断文件是否存在
if (file.exists()) {
//创建一个用于操作文件的字节输出流对象,创建就必须明确数据存储目的地
FileOutputStream fos = new FileOutputStream(file);
//写入
fos.write(Obj_Str.getBytes(StandardCharsets.UTF_8));
System.out.println("成功!!!");
//刷新并关闭流
fos.flush();
fos.close();
}
这里注意:不存在路径的时候会自动创建
四、组合 爬取时间的数据保存到日期文件夹下分个文件
@Override
public void run() {
// 定义目标网址
String urlStr = "https://gbfs.citibikenyc.com/gbfs/en/station_status.json";
// 定义文件夹名
String pathDateStr = MyTimeUtils.getStringDateShort();
// 定义文件名
String pathFileStr = MyTimeUtils.getUserDate("yyyyMMddhhmmss");
pathFileStr += ".json";
// 爬取内容
String content = MyHttpClientUtils.httpsGet(urlStr);
// 文件存储
MyFileUtils.saveFile("data/"+pathDateStr+"/"+pathFileStr,content);
}