1.pom文件需要的依赖
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>2.4.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mybatis/mybatis -->
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.4.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/druid -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.1.10</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.1.0</version>
</dependency>
2.xml文件配置
mybatis-xml:
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE configuration PUBLIC
"-//mybatis.org//DTD Config 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-config.dtd">
<configuration>
<typeAliases>
<typeAlias type="org.example.DruidDataSourceFactory" alias="DRUID"></typeAlias>
<typeAlias type="org.entry.Events" alias="event"></typeAlias>
</typeAliases>
<environments default="zjy">
<environment id="zjy">
<transactionManager type="JDBC"></transactionManager>
<dataSource type="DRUID">
<property name="driver" value="com.mysql.jdbc.Driver"/>
<property name="url" value="jdbc:mysql://192.168.181.132:3306/ms_dm_intes"/>
<property name="username" value="root"/>
<property name="password" value="root"/>
</dataSource>
</environment>
<environment id="zjy1">
<transactionManager type="JDBC"></transactionManager>
<dataSource type="DRUID">
<property name="driver" value="org.apache.hive.jdbc.HiveDriver"/>
<property name="url" value="jdbc:hive2://192.168.181.132:10000/dwd_intes"/>
<property name="username" value=""/>
<property name="password" value=""/>
</dataSource>
</environment>
</environments>
<mappers>
<mapper resource="mapper/mysql-events.xml"></mapper>
<mapper resource="mapper/hive-events.xml"></mapper>
</mappers>
</configuration>
hive-events.xml:
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC
"-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.dao.HiveEventDAO">
<select id="findAll" resultType="java.util.Map" parameterType="int">
select eventid,userid,starttime,city,states,zip,country,lat,lng,features from dwd_intes.tmp where flag=#{flag}
</select>
</mapper>
mysql-events.xml:
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC
"-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.dao.MySQLEventDAO">
<insert id="batchInsert" parameterType="java.util.List">
insert into dm_events_bak1 values
<foreach collection="list" item="eve" separator=",">
(
#{eve.eventid},#{eve.userid},#{eve.starttime},#{eve.city},
#{eve.states},#{eve.zip},#{eve.country},#{eve.lat},#{eve.lng},#{eve.features}
)
</foreach>
</insert>
</mapper>
3.JAVA代码
dao层:
HiveEventDAO
package org.dao;
import org.entry.Events;
import java.util.List;
import java.util.Map;
public interface HiveEventDAO {
public List<Events>findAll(int page);
}
MySQLEventDAO:
package org.dao;
import org.entry.Events;
import java.util.List;
public interface MySQLEventDAO {
public List<Events> findAll();
public void batchInsert(List<Events>evs);
}
实体类entry层:
Events:
package org.entry;
public class Events {
private String eventid;
private String userid;
private String starttime;
private String city;
private String states;
private String zip;
private String country;
private String lat;
private String lng;
private String features;
public String getEventid() {
return eventid;
}
public void setEventid(String eventid) {
this.eventid = eventid;
}
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public String getStarttime() {
return starttime;
}
public void setStarttime(String starttime) {
this.starttime = starttime;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getStates() {
return states;
}
public void setStates(String states) {
this.states = states;
}
public String getZip() {
return zip;
}
public void setZip(String zip) {
this.zip = zip;
}
public String getCountry() {
return country;
}
public void setCountry(String country) {
this.country = country;
}
public String getLat() {
return lat;
}
public void setLat(String lat) {
this.lat = lat;
}
public String getLng() {
return lng;
}
public void setLng(String lng) {
this.lng = lng;
}
public String getFeatures() {
return features;
}
public void setFeatures(String features) {
this.features = features;
}
}
数据源util层:
DruidDataSourceFactory:
package org.example;
import com.alibaba.druid.pool.DruidDataSource;
import org.apache.ibatis.datasource.DataSourceFactory;
import javax.sql.DataSource;
import java.sql.SQLException;
import java.util.Properties;
public class DruidDataSourceFactory implements DataSourceFactory
{
private Properties prop;
@Override
public void setProperties(Properties properties) {
this.prop=properties;
}
@Override
public DataSource getDataSource() {
DruidDataSource druid = new DruidDataSource();
druid.setDriverClassName(this.prop.getProperty("driver"));
druid.setUrl(this.prop.getProperty("url"));
druid.setUsername(this.prop.getProperty("username"));
druid.setPassword(this.prop.getProperty("password"));
// druid.setMaxActive(Integer.parseInt(this.prop.getProperty("maxactive")));
// druid.setInitialSize(Integer.parseInt(this.prop.getProperty("initialsize")));
try {
druid.init();
} catch (SQLException e) {
e.printStackTrace();
}
return druid;
}
}
DatabaseUtils:
package org.example;
import org.apache.ibatis.io.Resources;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;
import java.io.IOException;
import java.io.InputStream;
public class DatabaseUtils {
private static final String configPath="mybatis-config.xml";
public static SqlSession getSession(String db){
SqlSession session = null;
try {
InputStream is = Resources.getResourceAsStream(configPath);
SqlSessionFactory factory = new SqlSessionFactoryBuilder().build(is,db.equals("mysql")?"zjy":"zjy1");
session = factory.openSession();
} catch (IOException e) {
e.printStackTrace();
}
return session;
}
public static void close(SqlSession session){
session.close();
}
}
services层:
package org.services;
import org.apache.ibatis.session.SqlSession;
import org.dao.HiveEventDAO;
import org.dao.MySQLEventDAO;
import org.entry.Events;
import org.example.DatabaseUtils;
import java.util.List;
public class HiveToMySqlService {
private List<Events> change(int page){
//通过hiveeventdao查找hive中的数据
SqlSession hiveSession = DatabaseUtils.getSession("hive");
HiveEventDAO hivedao = hiveSession.getMapper(HiveEventDAO.class);
List<Events> lst = hivedao.findAll(page);
return lst;
}
public void fillMySql(){
//准备mysql的数据库连接
SqlSession session = DatabaseUtils.getSession("mysql");
MySQLEventDAO medao = session.getMapper(MySQLEventDAO.class);
//调用转换方法获取hive中的数据
for (int i = 0; i <= 627; i++) {
List<Events> eves = change(i);
medao.batchInsert(eves);
session.commit();
}
}
}
运行层:
package org.example;
import org.services.HiveToMySqlService;
public class App
{
public static void main(String[] args)
{
HiveToMySqlService hts = new HiveToMySqlService();
hts.fillMySql();
}
}
由于service层的中是hive每次读取5000条数据,就往mysql里面塞5000条,而程序中hive读取数据的速度是远大于5000的,所以对这个类进行了优化
package org.services;
import org.apache.ibatis.session.SqlSession;
import org.dao.HiveDao;
import org.dao.MysqlDao;
import org.entry.Events;
import org.util.DatabaseUtil;
import java.util.ArrayList;
import java.util.List;
public class HiveToMysql {
static HiveDao hdao;
static{
SqlSession hiveSession = DatabaseUtil.getSession("hive");
hdao = hiveSession.getMapper(HiveDao.class);
}
public void fillMysql(){
SqlSession mysqlSession = DatabaseUtil.getSession("mysql");
MysqlDao mdao = mysqlSession.getMapper(MysqlDao.class);
//本来对hive的数据进行开窗row_number排序,在对序号进行取余操作得到另一列flag,本来是对5000取余,所以之前的类中循环写的到627,这一次对hive读取数据优化,所以加大了每次读取的量,对50000取余,所以得到62组
for (int flag = 0; flag < 63; flag++) {
List<Events> eves = hdao.findAll(flag);
//创建数组用来存放每组flag的数据,插入到mysql后,再清空
List<Events> tmpdata = new ArrayList<>();
for (int i = 1; i <= eves.size(); i++) {
tmpdata.add(eves.get(i-1));
if (i%8000==0||i==eves.size()) {
//当临时存放数据的数组存放到8000条时,或者到最后一个flag不满8000但是长度达到hive中数据的长度时,mysql就提交批量插入
mdao.batchInsert(tmpdata);
mysqlSession.commit();
tmpdata.clear();
}
}
}
}
}