UDTF调用接口得到xml,并解析xml文件
UDTF实现的是返回返回多条数据,UDTF(User-Defined Table-Generating Functions) 用来解决 输入一行输出多行(On-to-many mapping) 的需求。
UDTF需要实现三个方法initialize(定义返回数据的类型,个数),close(清理),process(真正的处理过程,每调用一次forward就会产生一行数据,)
例子:调用接口(参数为xml)获取到xml数据,并将xml数据解析出来,经过UDTF来输出多行数据
首先调用接口用的HttpClient
package com.saic.utils;
import java.io.IOException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections.MapUtils;
import org.apache.http.Consts;
import org.apache.http.HeaderIterator;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.ParseException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustStrategy;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.util.EntityUtils;
/**
*
* @ClassName: HttpsUtils
* @Description: TODO(https post忽略证书请求)
*/
public class HttpClientUtils {
private static final String HTTP = "http";
private static final String HTTPS = "https";
private static SSLConnectionSocketFactory sslsf = null;
private static PoolingHttpClientConnectionManager cm = null;
private static SSLContextBuilder builder = null;
static {
try {
builder = new SSLContextBuilder();
// 全部信任 不做身份鉴定
builder.loadTrustMaterial(null, new TrustStrategy() {
@Override
public boolean isTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
return true;
}
});
sslsf = new SSLConnectionSocketFactory(builder.build(),
new String[]{"SSLv2Hello", "SSLv3", "TLSv1", "TLSv1.2"}, null, NoopHostnameVerifier.INSTANCE);
Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
.register(HTTP, new PlainConnectionSocketFactory()).register(HTTPS, sslsf).build();
cm = new PoolingHttpClientConnectionManager(registry);
cm.setMaxTotal(200);// max connection
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* httpClient post请求
*
* @param url 请求url
* @param header 头部信息
* @param param 请求参数 form提交适用
* @param entity 请求实体 json/xml提交适用
* @return 可能为空 需要处理
* @throws Exception
*/
public static String post(String url, Map<String, String> header, Map<String, String> param, StringEntity entity)
throws Exception {
String result = "";
CloseableHttpClient httpClient = null;
try {
httpClient = getHttpClient();
//HttpGet httpPost = new HttpGet(url);//get请求
HttpPost httpPost = new HttpPost(url);//Post请求
// 设置头信息
if (MapUtils.isNotEmpty(header)) {
for (Map.Entry<String, String> entry : header.entrySet()) {
httpPost.addHeader(entry.getKey(), entry.getValue());
}
}
// 设置请求参数
if (MapUtils.isNotEmpty(param)) {
List<NameValuePair> formparams = new ArrayList<NameValuePair>();
for (Map.Entry<String, String> entry : param.entrySet()) {
// 给参数赋值
formparams.add(new BasicNameValuePair(entry.getKey(), entry.getValue()));
}
UrlEncodedFormEntity urlEncodedFormEntity = new UrlEncodedFormEntity(formparams, Consts.UTF_8);
httpPost.setEntity(urlEncodedFormEntity);
}
// 设置实体 优先级高
if (entity != null) {
httpPost.addHeader("Content-Type", "text/xml");
httpPost.setEntity(entity);
}
HttpResponse httpResponse = httpClient.execute(httpPost);
int statusCode = httpResponse.getStatusLine().getStatusCode();
System.out.println("状态码:" + statusCode);
if (statusCode == HttpStatus.SC_OK) {
HttpEntity resEntity = httpResponse.getEntity();
result = EntityUtils.toString(resEntity);
} else {
readHttpResponse(httpResponse);
}
} catch (Exception e) {
throw e;
} finally {
if (httpClient != null) {
httpClient.close();
}
}
return result;
}
public static String postXML(String url,String xml){
CloseableHttpClient client = null;
CloseableHttpResponse resp = null;
try{
HttpPost httpPost = new HttpPost(url);
httpPost.setHeader("Content-Type", "text/xml; charset=UTF-8");
client = HttpClients.createDefault();
StringEntity entityParams = new StringEntity(xml,"utf-8");
httpPost.setEntity(entityParams);
client = HttpClients.createDefault();
resp = client.execute(httpPost);
String resultMsg = EntityUtils.toString(resp.getEntity(),"utf-8");
return resultMsg;
}catch (Exception e){
e.printStackTrace();
}finally {
try {
if(client!=null){
client.close();
}
if(resp != null){
resp.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
public static CloseableHttpClient getHttpClient() throws Exception {
CloseableHttpClient httpClient = HttpClients.custom().setSSLSocketFactory(sslsf).setConnectionManager(cm)
.setConnectionManagerShared(true).build();
return httpClient;
}
public static String readHttpResponse(HttpResponse httpResponse) throws ParseException, IOException {
StringBuilder builder = new StringBuilder();
// 获取响应消息实体
HttpEntity entity = httpResponse.getEntity();
// 响应状态
builder.append("status:" + httpResponse.getStatusLine());
builder.append("headers:");
HeaderIterator iterator = httpResponse.headerIterator();
while (iterator.hasNext()) {
builder.append("\t" + iterator.next());
}
// 判断响应实体是否为空
if (entity != null) {
String responseString = EntityUtils.toString(entity);
builder.append("response length:" + responseString.length());
builder.append("response content:" + responseString.replace("\r\n", ""));
}
return builder.toString();
}
}
解析xml数据并封装起来
package com.saic.utils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import java.io.ByteArrayInputStream;
import java.util.*;
public class AlarmHisXmlFileParse {
public static ArrayList<Map<String,String>> dom4jXml(String interfaceData) {
List<Element> elementList = null;
ArrayList<Map<String,String>> workerList = null;
// ArrayList<List<String>> workerList = null;
try {
SAXReader sr = new SAXReader();
Document document = sr.read(new ByteArrayInputStream(interfaceData.getBytes()));
Element root = document.getRootElement();
elementList = root.elements();
workerList = new ArrayList();
} catch (DocumentException e) {
e.printStackTrace();
}
for (Element e : elementList) {
// 解析标签下一级标签
Element e1 = e.element("ET_PSNDOC");
List<Element> items = e1.elements();
for (int i = 0; i < items.size(); i++) {
Map<String, String> map = new HashMap<>();
Element keyValue = items.get(i);
List<Element> perData = keyValue.elements();
for (int j = 0; j < perData.size(); j++) {
Element perMap = perData.get(j);
map.put(perMap.getName(), perMap.getStringValue());
// list.add(el.getStringValue());
}
workerList.add(map);
}
}
return workerList;
}
}
然后就是实现udtf的步骤
1.继承GenericUDTF
public class UDTFofWorker extends GenericUDTF {
}
2.实现initialize()
@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
//这里,你需要多少列就输入多少个,列数应和每一行数据的个数相等
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("col1");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col2");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col3");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col4");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col5");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col6");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col7");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col8");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col9");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col10");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col11");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col12");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col13");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col14");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col15");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col16");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col17");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col18");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col19");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col20");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col21");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col22");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col23");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col24");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col25");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col26");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col27");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col28");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col29");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col30");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col31");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col32");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col33");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col34");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("col35");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
3.实现process()
@Override
public void process(Object[] args) throws HiveException {
String url = "调用的接口";
String xml = "传入的参数";
//调用接口
String responseContent = postXML(url, xml);
//去除获取到的xml多余标签
String s1 = responseContent.replace("<soap-env:Header/><soap-env:Body>", "").replace("</soap-env:Body>", "");
//解析XML数据
ArrayList<Map<String, String>> workers = AlarmHisXmlFileParse.dom4jXml(s1);
List<ArrayList<String>> list = new ArrayList<>();
for (Map<String, String> worker : workers) {
ArrayList<String> list1 = new ArrayList<>();
//我这里是为了保证数据顺序,用了最笨的方法
list1.add(worker.get("ZHRYGH"));
......
......
list.add(list1);
}
//一个forward是一行数据
forward(list);
}
4.最后实现close()
@Override
public void close() throws HiveException {
}
仅代表个人所学经验,经验有限,有不足的地方还请指出来,共同学习