salesforce数据写入hdfs

概述

访问Salesforace,将数据写入到hdfs中。稍加改动就可以写入其他数据源等等

详细

概述

通过代码抽取Salesforace数据,写入文本格式至到HDFS中。

项目结构

├─dataTransfer(项目根节点)

│  ├─com.example.sfdctohdfs  项目主包

│  │  ├─com.example.sfdctohdfs.hdfs    hdfs工具包

│  │  │  ├─com.example.sfdctohdfs.hdfs.util    工具类

│  │  ├─com.example.sfdctohdfs.sfdc    sfdc处理包

详情

  • 配置

配置Salesforace、Hive配置信息。配置内容存放与项目中resources文件下application.yml文件中,如下所示

#salesforce 配置
salesforce:
  client_id: 3MVG99OxTyEMCQ3jLn42JgU9JP4qJYx5cZgYGcAq4CdAqWd.R6A4L2KHUonNl0schJV07gNk.OahPYvlGpHaq
  client_secret: 13CA01010ED07DE764DE083D0C802FB7F69CC2CA23AFB89450626FBE1A8C3095
  username: username
  password: test1234
  url: https://login.salesforce.com/services/oauth2/token?grant_type=password
  //是否使用代理
  isProxy: true
  proxyUrl: proxy.xx.com
  proxyPort: 80
hdfs:
  url: hdfs://10.1.1.1:9000
  user: root
SalesforceDataUtilg工具类
package com.example.sfdctohdfs.sfdc;
 
import cn.hutool.core.date.DateUnit;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.date.TimeInterval;
import cn.hutool.core.text.csv.CsvWriter;
import cn.hutool.core.util.NumberUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONArray;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.example.sfdctohdfs.hdfs.HdfsClient;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
 
 
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.math.BigDecimal;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URISyntaxException;
import java.util.*;
 
 
@Component
@Slf4j
public class SalesforceDataUtil {
 
    @Value("${salesforce.client_id}")
    private String client_id;
    @Value("${salesforce.client_secret}")
    private String client_secret;
    @Value("${salesforce.username}")
    private String username;
    @Value("${salesforce.password}")
    private String password;
    @Value("${salesforce.url}")
    private String url;
    @Value("${salesforce.isProxy}")
    private Boolean isProxy;
    @Value("${salesforce.proxyUrl}")
    private String proxyUrl;
    @Value("${salesforce.proxyPort}")
    private Integer proxyPort;
 
    @Autowired
    private HdfsClient hdfsClient;
    /**
     * 登录salesforce
     * @return
     */
    public  JSONObject login(){
        HashMap<String, Object> paramMap = new HashMap<>();
        paramMap.put("client_id", client_id);
        paramMap.put("client_secret", client_secret);
        paramMap.put("username", username);
        paramMap.put("password", password);
        log.info(paramMap.toString());
        if(!isProxy){
            String result= HttpUtil.post(url, paramMap);
            JSONObject json= JSONUtil.parseObj(result);
            return json;
        }else{
            String result=HttpRequest.post(url)
                    .setProxy(new Proxy(Proxy.Type.HTTP,new InetSocketAddress(proxyUrl,proxyPort)))
                    .form(paramMap)
                    .execute().body();
            JSONObject json= JSONUtil.parseObj(result);
            return json;
        }
    }
 
    //处理sfdc sql语句  select id,name,LastModifiedDate,CreatedDate from Account
    //select from 必须小写
    public String getQuerySalesforceSql(String sfdcSql) {
        return sfdcSql.split("select\\+")[1].split("\\+from")[0];
    }
 
    /**
     * 对数据进行简单的处理,去掉一些特殊字符
     * @param str
     * @return
     */
    public static List<String> getInsertSqlStr(String str,String field) {
        List<String> list = new ArrayList<>();
        JSONArray jsonArray = JSONUtil.parseArray(JSONUtil.parseObj(str).get("records").toString());
        //处理查询字段
        String[] fields = field.split(",");
        //查询字段不为null 根据参数中的字段来查询
        //list.add(fields);
        for (int i = 0; i < jsonArray.size(); i++) {
            StringBuffer strings = new StringBuffer();
            Integer count=0;
            for (int j = 0; j < fields.length; j++) {
                JSONObject o = JSONUtil.parseObj(jsonArray.get(i).toString());
                Object value = o.get(fields[j]);
                if(j==0){
                    if (value == null) {
                        strings.append("\\N");
                    } else {
                        strings.append(value.toString()
                                .replace("\n"," ")
                                .replace("\r"," ")
                                .replace("|","-")
                        );
                    }
                }else{
                    if (value == null) {
                        strings.append("|"+"\\N");
                    } else {
                        strings.append("|"+value.toString()
                                .replace("\n"," ")
                                .replace("\r"," ")
                                .replace("|","-")
                        );
                    }
                }
            }
            list.add(strings.toString());
        }
        return list;
    }
 
    public void writeString(OutputStreamWriter writer,List<String> list) throws IOException {
        for(int i=0;i<list.size();i++){
            writer.write(list.get(i)+System.getProperty("line.separator"));
        }
    }
 
 
    public void executeQuery(String querySql,String filePath) throws URISyntaxException, IOException, InterruptedException {
        String field=getQuerySalesforceSql(querySql);
        OutputStreamWriter writer=hdfsClient.createCsvWriter(filePath,querySql.split("from\\+")[1].split("\\+")[0]);
        Proxy proxy = new Proxy(Proxy.Type.HTTP,new InetSocketAddress(proxyUrl,proxyPort));
        try {
            TimeInterval timer = DateUtil.timer();
            JSONObject json=login();
            log.info(JSONUtil.toJsonStr(json));
            String queryUrl=json.get("instance_url")+"/services/data/v52.0/query/?q=";
            log.info("查询语句:"+querySql);
            HttpRequest httpRequest=HttpRequest.get(queryUrl+querySql)
                    .header("Authorization","Bearer " +json.get("access_token"));
            if(isProxy){
                httpRequest.setProxy(proxy);
            }
            String queryBody=httpRequest.execute().body();
            if(!JSONUtil.isJsonObj(queryBody)){
                log.error(queryBody);
            }
            String totalSize= JSONUtil.parseObj(queryBody).get("totalSize").toString();
            if("0".equals(totalSize)){
                log.info("未查询到数据");
            }
            BigDecimal size=null;
            String nextRecordsUrl=null;
            if(JSONUtil.parseObj(queryBody).containsKey("nextRecordsUrl")){
                size= new BigDecimal(JSONUtil.parseObj(queryBody).get("nextRecordsUrl").toString().split("-")[1]);
                nextRecordsUrl=json.get("instance_url")+JSONUtil.parseObj(queryBody).get("nextRecordsUrl").toString();
                log.info(nextRecordsUrl);
            }else{
                size=new BigDecimal(JSONUtil.parseObj(queryBody).get("totalSize").toString());
            }
            //获取表头
            String head= field;
            writer.write(head+System.getProperty("line.separator"));
            writer.flush();
            writeString(writer,getInsertSqlStr(queryBody,field));
            writer.flush();
            log.info("第一次读取数据:"+size+"条,花费时间:"+timer.interval()+"毫秒。");
            Long temlpTime=System.currentTimeMillis();
            while (JSONUtil.parseObj(queryBody).containsKey("nextRecordsUrl")) {
                long currentTimeMillis8 = System.currentTimeMillis();
                //token有效期大于一个小时50分钟重新登录!
                if(DateUtil.between(new Date(temlpTime), new Date(currentTimeMillis8), DateUnit.MINUTE)>110){
                    json=login();
                    temlpTime=currentTimeMillis8;
                    log.info("token过期-重新登录!");
                }
                httpRequest=HttpRequest.get(nextRecordsUrl)
                        .header("Authorization","Bearer " +json.get("access_token"));
                if(isProxy){
                    httpRequest.setProxy(proxy);
                }
                String queryBodytlmp=httpRequest.execute().body();
                writeString(writer,getInsertSqlStr(queryBodytlmp,field));
                writer.flush();
                if(!JSONUtil.parseObj(queryBodytlmp).containsKey("nextRecordsUrl")){
                    size=NumberUtil.add(size,new BigDecimal((JSONUtil.parseArray(JSONUtil.parseObj(queryBodytlmp).get("records").toString()).size())));
                    log.info("总读取数据:"+size+",花费时间:"+(timer.interval())+"毫秒。");
                    break;
                }
                nextRecordsUrl=json.get("instance_url")+JSONUtil.parseObj(queryBodytlmp).get("nextRecordsUrl").toString();
                size=new BigDecimal(JSONUtil.parseObj(queryBodytlmp).get("nextRecordsUrl").toString().split("-")[1]);
                log.info(nextRecordsUrl);
                log.info("总读取数据:"+size+",花费时间:"+(timer.interval())+"毫秒。");
            }
        }catch (Exception e){
            throw e;
        }finally {
            writer.close();
        }
 
    }
}
  • 程序主入口SfdctohdfsApplication

本demo需求为jar包运行,故需要通过主程序入口调用方法。最终调用方式为 java -jar jarname args[0] args[1]。代码中可以将自定义日志名称和地址三行注释后本地直接运行,参数也需要手动赋值;

package com.example.sfdctohdfs;
 
import cn.hutool.core.date.DateUtil;
import com.example.sfdctohdfs.hdfs.util.SpringUtil;
import com.example.sfdctohdfs.sfdc.SalesforceDataUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.ApplicationContext;
 
import java.io.FileNotFoundException;
import java.io.PrintStream;
 
@SpringBootApplication
@Slf4j
public class SfdctohdfsApplication {
 
 
    public static void main(String[] args) throws FileNotFoundException {
        //自定义日志文件名称和地址
        String logName=args[0].split("from\\+")[1].split("\\+")[0]+"_"+DateUtil.format(DateUtil.date(),"yyyy-MM-dd_HH-mm-ss");
        PrintStream ps2=new PrintStream("./"+logName +".log");// 创建文件输出流2
        System.setOut(ps2);
        try {
            SpringApplication.run(SfdctohdfsApplication.class, args);
            ApplicationContext context = SpringUtil.getApplicationContext();
            SalesforceDataUtil salesforceDataUtil =context.getBean(SalesforceDataUtil.class);
            String querySql=args[0];
            //args[0]:  传入sql参数
            //String querySql="select+UCM_ID__c,UCM_Status__c,Name,Alternate_Account_Name__c,Alias__c,Facility_Type__c,Facility_Ownership_Type__c,Go_to_Market_P_L_del__c,China_Hospital_Category__c,Account_Segmentation__c,Sales_Channel__c,China_Hospital_Level__c,China_Hospital_Grade__c,Large_Agency__c,Country_Name_2__c,Billing_State__c,Primary_Billing_City__c,Primary_County__c,BillingAddress,BillingCity,BillingCountry,BillingGeocodeAccuracy,BillingLatitude,BillingLongitude,BillingPostalCode,BillingState,BillingStreet,Parent_Commercial_ID__c,Parentid,KYC_Done__c,GSP_Status__c,GE_Show_Site__c,China_Number_of_Outpatient__c,China_Number_of_bed__c,China_Annual_Revenue__c,CreatedDate,LastModifiedDate,Record_Status__c,Description,Comments__c,GE_Site_Free_Field_01__c,GE_Site_Free_Field_02__c,Service_Sales_Market_Segment__c,GD_Name__c,GD_UCMID__c,Parent__c,Ultimate_Parent__c,Active__c,China_Government_Level__c,China_GE_segmentation__c,Is_Top_X__c,Ascend__c,GE_Site_ID__c,GE_Site_Type__c,GE_Site_Abbreviated_Name__c,GE_Site_Service_Station_ID__c,GE_Site_Name_English__c,HUB_Commercial_Entity__c,Termination_Reason__c,Termination_Date__c,UltimateParent_HL__c,Independent_Procurement__c,Japan_Primary_Address_UCM__c,Secondary_Address__c,Phone__c,Alternate_Phone__c,Website,Fax__c,L1_Market_Revenue__c,L1_Position__c,L1_Type__c,L1_Segment__c,ePOP_Account_ID__c,China_Dealer_Id__c,ePOP_Account_Name__c,Data_Steward_Notes__c,MMI_ID__c,Hospital_Certificate_No__c,Hospital_Certificate_End_Date__c,Business_License_End_Date__c,Business_License_No__c,Medical_Device_Operation_License_No__c,Medical_Device_Op_License_End_Date__c,Medical_Device_Filing_LicenseNo_ClassII__c,Other_License_No__c,Other_License_End_Date__c,Region__c,Zone__c,CreatedByid,UCM_Address_ID__c,Site_ID__c,Commercial_ID__c,LastModifiedByid,Ownerid,DUNS__c,Salesforce_ID__c,Oracle_ERP_ID__c,ReadyToPublish__c,Japan_Memo_File__c,RecordTypeid,LS_Global_Region__c,Global_Region__c,Language__c,Test_Record__c,Account_Setting__c,HUB_Identifier__c,GE_Hub_Country__c,MedicalDevice_FilingLicense_ClassII_Date__c,ShippingAddress,ShippingCity,ShippingCountry,ShippingGeocodeAccuracy,ShippingLatitude,ShippingLongitude,ShippingPostalCode,ShippingState,ShippingStreet,L1_Market_Revenue_Sub_Segment__c,L1_Type_Sub_Category__c,L2_Org_Segment__c,Partner_Account_Status__c,Partner_Account_Status_Description__c,Termination_Category__c,Approval_Status__c,HCIT_PACS_IW_Service_Level__c,IsDataLoad__c+from+account+where+Global_Region__c='China'";
            String filePath=args[1];
            //args[1]: hdfs文件存放路径
            //String filePath="/wtest/";
            salesforceDataUtil.executeQuery(querySql,filePath);
            System.out.println("程序结束!");
            System.exit(0);
        }catch (Exception e){
            log.error("系统异常:",e);
            System.out.println("程序运行出现异常!");
            System.exit(1);
        }
    }
 
}
  • 总结

该程序主要是读取sfdc数据写入hdfs中,稍加改动即可写入其他数据源比如mysql等等。也可以集成到项目中,作为工具类来使用

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

西安未央

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值