JAVA拉取Hive的数据导入到MySQL中

在这里插入图片描述

1.pom文件需要的依赖

    <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
    <dependency>
      <groupId>mysql</groupId>
      <artifactId>mysql-connector-java</artifactId>
      <version>5.1.38</version>
    </dependency>
   <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive -->
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-hive_2.11</artifactId>
      <version>2.4.4</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.mybatis/mybatis -->
    <dependency>
      <groupId>org.mybatis</groupId>
      <artifactId>mybatis</artifactId>
      <version>3.4.6</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/com.alibaba/druid -->
    <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>druid</artifactId>
      <version>1.1.10</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc -->
    <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-jdbc</artifactId>
      <version>1.1.0</version>
    </dependency>

2.xml文件配置
mybatis-xml:

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE configuration PUBLIC
        "-//mybatis.org//DTD Config 3.0//EN"
        "http://mybatis.org/dtd/mybatis-3-config.dtd">
<configuration>
    <typeAliases>
        <typeAlias type="org.example.DruidDataSourceFactory" alias="DRUID"></typeAlias>
        <typeAlias type="org.entry.Events" alias="event"></typeAlias>
    </typeAliases>
    <environments default="zjy">
        <environment id="zjy">
            <transactionManager type="JDBC"></transactionManager>
            <dataSource type="DRUID">
                <property name="driver" value="com.mysql.jdbc.Driver"/>
                <property name="url" value="jdbc:mysql://192.168.181.132:3306/ms_dm_intes"/>
                <property name="username" value="root"/>
                <property name="password" value="root"/>
            </dataSource>
        </environment>
        <environment id="zjy1">
            <transactionManager type="JDBC"></transactionManager>
            <dataSource type="DRUID">
                <property name="driver" value="org.apache.hive.jdbc.HiveDriver"/>
                <property name="url" value="jdbc:hive2://192.168.181.132:10000/dwd_intes"/>
                <property name="username" value=""/>
                <property name="password" value=""/>
            </dataSource>
        </environment>
    </environments>

    <mappers>
        <mapper resource="mapper/mysql-events.xml"></mapper>
        <mapper resource="mapper/hive-events.xml"></mapper>
    </mappers>
</configuration>

hive-events.xml:

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC
        "-//mybatis.org//DTD Mapper 3.0//EN"
        "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.dao.HiveEventDAO">
    <select id="findAll" resultType="java.util.Map" parameterType="int">
        select eventid,userid,starttime,city,states,zip,country,lat,lng,features from dwd_intes.tmp where flag=#{flag}
    </select>
</mapper>

mysql-events.xml:

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC
        "-//mybatis.org//DTD Mapper 3.0//EN"
        "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.dao.MySQLEventDAO">
    <insert id="batchInsert" parameterType="java.util.List">
        insert into dm_events_bak1 values
        <foreach collection="list" item="eve" separator=",">
            (
            #{eve.eventid},#{eve.userid},#{eve.starttime},#{eve.city},
            #{eve.states},#{eve.zip},#{eve.country},#{eve.lat},#{eve.lng},#{eve.features}
            )
        </foreach>
    </insert>
</mapper>

3.JAVA代码

dao层:

HiveEventDAO

package org.dao;

import org.entry.Events;

import java.util.List;
import java.util.Map;

public interface HiveEventDAO {
    public List<Events>findAll(int page);
}

MySQLEventDAO:

package org.dao;

import org.entry.Events;

import java.util.List;

public interface MySQLEventDAO {
    public List<Events> findAll();
    public void batchInsert(List<Events>evs);
}

实体类entry层:

Events:

package org.entry;

public class Events {
    private String eventid;
    private String userid;
    private String starttime;
    private String city;
    private String states;
    private String zip;
    private String country;
    private String lat;
    private String lng;
    private String features;

    public String getEventid() {
        return eventid;
    }

    public void setEventid(String eventid) {
        this.eventid = eventid;
    }

    public String getUserid() {
        return userid;
    }

    public void setUserid(String userid) {
        this.userid = userid;
    }

    public String getStarttime() {
        return starttime;
    }

    public void setStarttime(String starttime) {
        this.starttime = starttime;
    }

    public String getCity() {
        return city;
    }

    public void setCity(String city) {
        this.city = city;
    }

    public String getStates() {
        return states;
    }

    public void setStates(String states) {
        this.states = states;
    }

    public String getZip() {
        return zip;
    }

    public void setZip(String zip) {
        this.zip = zip;
    }

    public String getCountry() {
        return country;
    }

    public void setCountry(String country) {
        this.country = country;
    }

    public String getLat() {
        return lat;
    }

    public void setLat(String lat) {
        this.lat = lat;
    }

    public String getLng() {
        return lng;
    }

    public void setLng(String lng) {
        this.lng = lng;
    }

    public String getFeatures() {
        return features;
    }

    public void setFeatures(String features) {
        this.features = features;
    }
}

数据源util层:

DruidDataSourceFactory:

package org.example;

import com.alibaba.druid.pool.DruidDataSource;
import org.apache.ibatis.datasource.DataSourceFactory;

import javax.sql.DataSource;
import java.sql.SQLException;
import java.util.Properties;

public class DruidDataSourceFactory implements DataSourceFactory
{
    private Properties prop;
    @Override
    public void setProperties(Properties properties) {
        this.prop=properties;
    }

    @Override
    public DataSource getDataSource() {
        DruidDataSource druid = new DruidDataSource();
        druid.setDriverClassName(this.prop.getProperty("driver"));
        druid.setUrl(this.prop.getProperty("url"));
        druid.setUsername(this.prop.getProperty("username"));
        druid.setPassword(this.prop.getProperty("password"));
//        druid.setMaxActive(Integer.parseInt(this.prop.getProperty("maxactive")));
//        druid.setInitialSize(Integer.parseInt(this.prop.getProperty("initialsize")));
        try {
            druid.init();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return druid;
    }
}

DatabaseUtils:

package org.example;

import org.apache.ibatis.io.Resources;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;

import java.io.IOException;
import java.io.InputStream;

public class DatabaseUtils {
    private static final String configPath="mybatis-config.xml";

    public static SqlSession getSession(String db){
        SqlSession session = null;
        try {
            InputStream is = Resources.getResourceAsStream(configPath);
            SqlSessionFactory factory = new SqlSessionFactoryBuilder().build(is,db.equals("mysql")?"zjy":"zjy1");
            session = factory.openSession();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return session;

    }
    public static void close(SqlSession session){
        session.close();
    }
}

services层:
package org.services;

import org.apache.ibatis.session.SqlSession;
import org.dao.HiveEventDAO;
import org.dao.MySQLEventDAO;
import org.entry.Events;
import org.example.DatabaseUtils;
import java.util.List;


public class HiveToMySqlService {
    private List<Events> change(int page){
        //通过hiveeventdao查找hive中的数据
        SqlSession hiveSession = DatabaseUtils.getSession("hive");
        HiveEventDAO hivedao = hiveSession.getMapper(HiveEventDAO.class);

        List<Events> lst = hivedao.findAll(page);
        return lst;
    }

    public void fillMySql(){
        //准备mysql的数据库连接
        SqlSession session = DatabaseUtils.getSession("mysql");
        MySQLEventDAO medao = session.getMapper(MySQLEventDAO.class);

        //调用转换方法获取hive中的数据
        for (int i = 0; i <= 627; i++) {
            List<Events> eves = change(i);
            medao.batchInsert(eves);
            session.commit();
        }

    }

}

运行层:
package org.example;

import org.services.HiveToMySqlService;


public class App 
{
    public static void main(String[] args)
    {
        HiveToMySqlService hts = new HiveToMySqlService();
        hts.fillMySql();
    }
}

由于service层的中是hive每次读取5000条数据,就往mysql里面塞5000条,而程序中hive读取数据的速度是远大于5000的,所以对这个类进行了优化

package org.services;

import org.apache.ibatis.session.SqlSession;
import org.dao.HiveDao;
import org.dao.MysqlDao;
import org.entry.Events;
import org.util.DatabaseUtil;

import java.util.ArrayList;
import java.util.List;

public class HiveToMysql {
    static HiveDao hdao;
    static{
        SqlSession hiveSession = DatabaseUtil.getSession("hive");
        hdao = hiveSession.getMapper(HiveDao.class);
    }


    public void fillMysql(){
        SqlSession mysqlSession = DatabaseUtil.getSession("mysql");
        MysqlDao mdao = mysqlSession.getMapper(MysqlDao.class);
        //本来对hive的数据进行开窗row_number排序,在对序号进行取余操作得到另一列flag,本来是对5000取余,所以之前的类中循环写的到627,这一次对hive读取数据优化,所以加大了每次读取的量,对50000取余,所以得到62组
        for (int flag = 0; flag < 63; flag++) {
            List<Events> eves = hdao.findAll(flag);
            //创建数组用来存放每组flag的数据,插入到mysql后,再清空
            List<Events> tmpdata = new ArrayList<>();
            for (int i = 1; i <= eves.size(); i++) {
                tmpdata.add(eves.get(i-1));
                if (i%8000==0||i==eves.size()) {
                //当临时存放数据的数组存放到8000条时,或者到最后一个flag不满8000但是长度达到hive中数据的长度时,mysql就提交批量插入
                    mdao.batchInsert(tmpdata);
                    mysqlSession.commit();
                    tmpdata.clear();
                }
            }
        }
    }
}

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值