Java爬币种汇率
核心jar包
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
所需实体类,包含了爬虫返回数据的所有字段,具体含义看爬虫url点进去的标题含义
public class CrawlerCurrencyDTO {
private String currencyName;
private String buyingRate;
private String cashBuyingRate;
private String sellingRate;
private String cashSellingRate;
private String middleRate;
private String PubTime;
}
核心代码
public class CrawlerUtil {
/**
* 通过币种code爬取实时汇率数据
* @param scourceCurrencyList
* @return
*/
public List<CrawlerCurrencyDTO> execute(List<String> scourceCurrencyList) {
List<CrawlerCurrencyDTO> list = new ArrayList<>();
for(String str : scourceCurrencyList){
Object object = getExchangeRate(str);
CrawlerCurrencyDTO test = (CrawlerCurrencyDTO) object;
list.add(test);
}
return list;
}
/**
* 获取当日传入币别汇率信息
*
* @param sourceCurrency 币别
* @return
*/
private Object getExchangeRate(String sourceCurrency) {
//判断入参lsDate是否为空,若为空则赋值为当前时间
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
String lsToday = simpleDateFormat.format(new Date());
List list = new ArrayList();
//抓取时间为lsToday,币别为sourceCurrency,页数为page的中国银行网页信息
String searchEnHtml = getSearchEnHtml(lsToday, sourceCurrency);
//开始解析html中的汇率列表信息
Map map = assembleObjByHtml(searchEnHtml, sourceCurrency, lsToday);
return map.get("crawlerCurrencyDTO");
}
/**
* 获取整个网页的内容
*
* @param lsToday 传入当前时间或空
* @param lsSourceCurrency 币种
* @return
*/
private String getSearchEnHtml(String lsToday, String lsSourceCurrency) {
StringBuilder url = new StringBuilder("https://srh.bankofchina.com/search/whpj/searchen.jsp?");
url.append("erectDate=").append(lsToday);
url.append("¬hing=").append(lsToday);
url.append("&pjname=").append(lsSourceCurrency);
//当前查询页码为第一页
url.append("&page=").append(1);
System.out.println("拼接好的url:" + url);
CloseableHttpClient httpClient = HttpClients.createDefault();
CloseableHttpResponse response = null;
HttpPost httpPost = new HttpPost(url.toString());
httpPost.addHeader("Content-Type", "application/x-www-form-urlencoded;charset=utf-8");
httpPost.setHeader("Accept", "Accept: text/plain, */*");
httpPost.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3724.8 Safari/537.36");
httpPost.addHeader("x-amazon-user-agent", "AmazonJavascriptScratchpad/1.0 (Language=Javascript)");
httpPost.addHeader("X-Requested-With", "XMLHttpRequest");
String html = "";
try {
response = httpClient.execute(httpPost);
//判断响应状态为200,进行处理
if (response.getStatusLine().getStatusCode() == HttpStatus.OK.value()) {
HttpEntity httpEntity = response.getEntity();
html = EntityUtils.toString(httpEntity, "utf-8");
} else {
System.out.println(EntityUtils.toString(response.getEntity(), "utf-8"));
}
} catch (IOException e) {
e.printStackTrace();
} finally {
HttpClientUtils.closeQuietly(response);
HttpClientUtils.closeQuietly(httpClient);
}
/***返回请求得到的页面**/
return html;
}
/**
* 根据取得的网页,解析html中的内容 先不做业务逻辑,全部查询
*
* @param html 要解析的html
* @param lsSourceCurrency 币种
* @param lsToday 日期
* @return
*/
private Map assembleObjByHtml(String html, String lsSourceCurrency, String lsToday) {
//存储数据
Map map = new HashMap(5);
//使用Jsoup将html解析为Document对象
Document document = Jsoup.parse(html);
//获取页面隐藏域中存放的当前页数
Elements pageItem = document.getElementsByAttributeValue("name", "page");
String pageItemValue = "";
pageItemValue = pageItem.select("input[name=page]").val();
map.put("page", pageItemValue);
//获取页面的整个table信息,这个返回的页面基本上是返回多个table,下方需要细化处理
Elements tables = document.getElementsByTag("table");
//设置存放汇率信息的table下标为-1(默认不存在)
int tableIndex = -1;
//从table中循环获取,查找含有Currency Name字段的table
for (int i = 0; i < tables.size(); i++) {
Element element = tables.get(i);
String text = element.text();
//找到含有汇率信息的table,给tableIndex赋值,跳出循环
if (text.contains("Currency Name")) {
tableIndex = i;
break;
}
}
List<CrawlerCurrencyDTO> list = new ArrayList();
//如果找到汇率列表信息
if (tableIndex > -1) {
Element table = tables.get(tableIndex);
CrawlerCurrencyDTO crawlerCurrencyDTO = new CrawlerCurrencyDTO();
//遍历该表格内的所有的<tr> <tr/>
Elements trs = table.select("tr");
for (int i = 1; true; ++i) {
Element tr =trs.get(i);
//将数据放入实体对象中
Elements tds = tr.select("td");
crawlerCurrencyDTO.setCurrencyName(tds.get(0).text());
crawlerCurrencyDTO.setBuyingRate(tds.get(1).text());
crawlerCurrencyDTO.setCashBuyingRate(tds.get(2).text());
crawlerCurrencyDTO.setSellingRate(tds.get(3).text());
crawlerCurrencyDTO.setCashSellingRate(tds.get(4).text());
crawlerCurrencyDTO.setMiddleRate(tds.get(5).text());
crawlerCurrencyDTO.setPubTime(tds.get(6).text());
break ;
}
map.put("crawlerCurrencyDTO", crawlerCurrencyDTO);
}
return map;
}
}
这个工具类是拿到最后汇率数据,以人民币为底,以币种code集合入参,得到实时汇率,具体实现根据需求做改动,有什么问题可以评论在下方,我每天会为大家一一解答。
希望能帮助到大家。