最近写了一个从数据库中读取文件并写在txt文件中(当数据过多的时候写在多个txt文件中)

本文介绍了一种从数据库读取数据并将其写入文件的方法。主要包括:通过配置文件加载数据库连接信息,执行SQL查询获取数据,使用Java进行数据处理,并将处理后的数据写入指定格式的文件。

一:连接数据库:

package com.files.readdata;


import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;


public class DbConnection {

public static Connection getConnection(){

Connection con = null;

Properties properties = new Properties();
InputStream in = Thread.currentThread().getClass().getResourceAsStream("/database.properties");
try {
properties.load(in);
String driver = properties.getProperty("driver");
if(driver != null){
System.setProperty("jdbc.drvers", driver);
}

String url = properties.getProperty("jdbc.url");
String username = properties.getProperty("jdbc.username");
String password = properties.getProperty("jdbc.password");
try {
con = DriverManager.getConnection(url,username,password);
} catch (SQLException e) {
e.printStackTrace();
Logger.getLogger(DbConnection.class.getName()).log(Level.SEVERE, null, e);;
}
} catch (IOException e1) {
e1.printStackTrace();
}

return con;
}


}

二:数据库操作:

package com.files.readdata;


import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Properties;


public class CallDatabase {

private Connection con =null;

public  void selectData(List<InfoBean> retList) throws SQLException {

Properties properties = new Properties();
InputStream in = Thread.currentThread().getClass().getResourceAsStream("/database.properties");
String sql;
String  sqlSA;
String  sqlDW;
con = DbConnection.getConnection();
PreparedStatement pstmt;
ResultSet rs;
try {
properties.load(in);
} catch (IOException e) {

e.printStackTrace();
}
WriteFile writerFile  =  new WriteFile();
//承保对账NB
List<InfoBean> retListNB = new ArrayList<InfoBean>();
sql = "select seq_No,bill_No,policy_No,certi_No,amount,return_code,return_desc from t_pa_nb_list";

pstmt =  con.prepareStatement(sql);
rs = pstmt.executeQuery(sql);
Double sumAmountNB = 0D;
while (rs.next()) {
InfoBean infoBean = new InfoBean();
BigDecimal seqNo = rs.getBigDecimal(1); // 获取的表字段换成自己需要的
String billNo = rs.getString(2);
String policyNo = rs.getString(3);
String certiNo = rs.getString(4);
BigDecimal amount = rs.getBigDecimal(5);
String returnCode = rs.getString(6);
String returnDesc = rs.getString(7);
infoBean.setSeqNo(seqNo);
infoBean.setBillNo(billNo);
infoBean.setPolicyNo(policyNo);
infoBean.setCertiNo(certiNo);
infoBean.setAmount(amount);
infoBean.setReturnCode(returnCode);
infoBean.setReturnDesc(returnDesc);
BigDecimal amountNB = infoBean.getAmount();//获取金额
   Double amNB = amountNB.doubleValue();//转换成double型
sumAmountNB = amNB+sumAmountNB;
retListNB.add(infoBean);

}
System.out.println("数据库中总条数NB:"+retListNB.size());
//追加对账SA
sqlSA = "select seq_No,bill_No,policy_No,certi_No,amount,return_code,return_desc from t_cs_sa_list";
pstmt = con.prepareStatement(sqlSA);
rs = pstmt.executeQuery(sqlSA);

Double sumAmountSA = 0D;
List<InfoBean> retListSA = new ArrayList<InfoBean>();
while (rs.next()) {
InfoBean infoBeanSA = new InfoBean();
BigDecimal seqNo = rs.getBigDecimal(1); // 获取的表字段换成自己需要的
String billNo = rs.getString(2);
String policyNo = rs.getString(3);
String certiNo = rs.getString(4);
BigDecimal amount = rs.getBigDecimal(5);
String returnCode = rs.getString(6);
String returnDesc = rs.getString(7);
infoBeanSA.setSeqNo(seqNo);
infoBeanSA.setBillNo(billNo);
infoBeanSA.setPolicyNo(policyNo);
infoBeanSA.setCertiNo(certiNo);
infoBeanSA.setAmount(amount);
infoBeanSA.setReturnCode(returnCode);
infoBeanSA.setReturnDesc(returnDesc);
BigDecimal amountSA = infoBeanSA.getAmount();//获取金额
   Double amSA = amountSA.doubleValue();//转换成double型
sumAmountSA = amSA+sumAmountSA;
retListSA.add(infoBeanSA);
}

//领取对账DW
sqlDW = "select seq_No,bill_No,policy_No,certi_No,amount,return_code,return_desc from t_cs_dw_list";
pstmt = con.prepareStatement(sqlDW);
rs = pstmt.executeQuery(sqlDW);

Double sumAmountDW = 0D;
List<InfoBean> retListDW = new ArrayList<InfoBean>();
while (rs.next()) {
InfoBean infoBeanDW = new InfoBean();
BigDecimal seqNo = rs.getBigDecimal(1); // 获取的表字段换成自己需要的
String billNo = rs.getString(2);
String policyNo = rs.getString(3);
String certiNo = rs.getString(4);
BigDecimal amount = rs.getBigDecimal(5);
String returnCode = rs.getString(6);
String returnDesc = rs.getString(7);
infoBeanDW.setSeqNo(seqNo);
infoBeanDW.setBillNo(billNo);
infoBeanDW.setPolicyNo(policyNo);
infoBeanDW.setCertiNo(certiNo);
infoBeanDW.setAmount(amount);
infoBeanDW.setReturnCode(returnCode);
infoBeanDW.setReturnDesc(returnDesc);
BigDecimal amount2 = infoBeanDW.getAmount();//获取金额
   Double amDW = amount2.doubleValue();//转换成double型
sumAmountDW = amDW+sumAmountDW;
retListDW.add(infoBeanDW);
}
if (rs != null) {
try {
rs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
if (pstmt != null) {
try {
pstmt.close();
} catch (Exception e) {
e.printStackTrace();
}
}
if (con != null) {
try {
con.close();
} catch (Exception e) {
e.printStackTrace();
}
}
Calendar calendar = Calendar.getInstance();// 此时打印它获取的是系统当前时间
calendar.add(Calendar.DATE, Integer.valueOf(properties.getProperty("indexday"))); // 得到某一天
String yestedayDate = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
String yestedayDay = new SimpleDateFormat("yyyyMMddHHmmss").format(calendar.getTime());
String path = properties.getProperty("path");
String filePath = path + yestedayDate;
String suffixs = properties.getProperty("suffixs");
String splict = properties.getProperty("splict");
String prefixion = properties.getProperty("prefixion");//NB
String prefix = properties.getProperty("prefix");//SA
String pref = properties.getProperty("pref");//DW
if(prefixion.equals("NB")){
String filePathName = filePath+splict+prefixion+yestedayDay+suffixs;
writerFile.writeFile(filePath,filePathName,retListNB,yestedayDate,sumAmountNB); 
}
if(prefix.equals("SA")){

String filePathName = filePath+splict+prefix+yestedayDay+suffixs;
writerFile.writeFileSA(filePath,filePathName,retListSA,yestedayDate,sumAmountSA);
}
if(pref.equals("DW")){
String filePathName = filePath+splict+pref+yestedayDay+suffixs;
writerFile.writeFileDW(filePath,filePathName,retListDW,yestedayDate,sumAmountDW);
}
}

/**
* 结息文件
* @param settlementBeanList
* @throws SQLException
*/
public void selectSettlementBean(List<SettlementBean> settlementBeanList) throws SQLException{

Properties properties = new Properties();

InputStream in = Thread.currentThread().getClass().getResourceAsStream("/database.properties");

con = DbConnection.getConnection();
PreparedStatement ptmt;
String sqlSM;
ResultSet rs;
try {
properties.load(in);
} catch (IOException e) {
e.printStackTrace();
}
WriteFile writeFileSM = new WriteFile();
List<SettlementBean> settlementBeanlist = new ArrayList<SettlementBean>(); 
sqlSM = "select seq_no,policy_no,account_type,interest_rate,settle_int_sum,start_capital,end_capital,start_balance,end_balance,day_bene_amount,settle_date from t_settlement_list";
   ptmt = con.prepareStatement(sqlSM);
   rs = ptmt.executeQuery(sqlSM);
   while(rs.next()){
    SettlementBean  settlement = new SettlementBean();
    BigDecimal seqNo = rs.getBigDecimal(1); // 获取的表字段换成自己需要的
String policyNo = rs.getString(2);
String accountType = rs.getString(3);
BigDecimal  interestRate = rs.getBigDecimal(4);
BigDecimal settleIntSum  = rs.getBigDecimal(5);
BigDecimal startCapital  = rs.getBigDecimal(6);
BigDecimal endCapital    = rs.getBigDecimal(7);
BigDecimal startBalance  = rs.getBigDecimal(8);
BigDecimal endBalance    = rs.getBigDecimal(9);
BigDecimal dayBeneAmount = rs.getBigDecimal(10);
Date settleDate        = rs.getDate(11);
settlement.setSeqNo(seqNo);
settlement.setPolicyNo(policyNo);
settlement.setAccountType(accountType);
settlement.setInterestRate(interestRate);
settlement.setSettleIntSum(settleIntSum);
settlement.setStartCapital(startCapital);
settlement.setEndCapital(endCapital);
settlement.setStartBalance(startBalance);
settlement.setEndBalance(endBalance);
settlement.setDayBeneAmount(dayBeneAmount);
settlement.setSettleDate(settleDate);
settlementBeanlist.add(settlement);
   }
   int countINV = settlementBeanlist.size();
   if(rs != null){
    try {
    rs.close();

} catch (Exception e) {
e.printStackTrace();
}
   }
      if(ptmt != null){
     try {

     ptmt.close();
} catch (Exception e) {
e.printStackTrace();
}
}
      if(con != null){
     try {
con.close();
} catch (Exception e) {
e.printStackTrace();
}
      }
   Calendar calendar = Calendar.getInstance();// 此时打印它获取的是系统当前时间
calendar.add(Calendar.DATE, Integer.valueOf(properties.getProperty("indexday"))); // 得到某一天
String yestedayDate = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
    String pathINV = properties.getProperty("pathINV");
String filePath =  pathINV + yestedayDate;
writeFileSM.writFileINV(countINV,settlementBeanlist,filePath);
        
}
 
}

三:写文件

package com.files.readdata;


import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.List;
import java.util.Properties;


public class WriteFile {
 
/**将数据内容写到txt文件中
 * NB领取对账
 * @param filePath
 * @param filePathName
 * @param retList1
 */


 public  void writeFile(String filePath,String filePathName,List<InfoBean> retListNB,String yestedayDate,Double sumAmountNB){



 File folder = new File(filePath); // 自定义文件路径
 File file = new File(filePathName); // 自定义文件路径
 
 if(!folder.exists() && !folder.isDirectory()){
 folder.mkdir();
 }else{
 System.out.println("文件夹已存在");
 }
 if (!file.exists()) {
 try {
 file.createNewFile();
 } catch (IOException e) {
 e.printStackTrace();
 }
 }else{
 System.out.println("文件已替换");
 }
   String  encoding = "UTF-8";
OutputStreamWriter write;
try {
write = new OutputStreamWriter(new FileOutputStream(file),encoding);
BufferedWriter writer = new BufferedWriter(write);
System.out.println("汇总金额:"+sumAmountNB);
String returnCode = "0";
String returnDesc = "成功";
System.out.println("交易日期:"+yestedayDate);
int account = retListNB.size();//总笔数
System.out.println("总笔数:"+account);
String strr = yestedayDate+"|"+sumAmountNB+"|"+ account+"|"+returnCode+"|"+returnDesc;
writer.write(strr+"\r\n");
System.out.println(strr);
for (InfoBean ib : retListNB) {
String str = ib.getSeqNo()+"|"+ib.getBillNo()+"|"+ib.getPolicyNo()+"|"+ib.getCertiNo()+"|"+ib.getAmount()+"|"+ib.getReturnCode()+"|"+ib.getReturnDesc();
System.out.println(str);
// writer.write(ib.toString()); // 输出流写入到文件中
writer.write(str+"\r\n");

}
writer.close();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
 }
 /**
  * 追加对账
  * @param filePath
  * @param filePathName
  * @param retListSA
  * @param yestedayDate
  * @param sumAmountSA
  */
 public  void writeFileSA(String filePath,String filePathName,List<InfoBean> retListSA,String yestedayDate,Double sumAmountSA){



 File folder = new File(filePath); // 自定义文件路径
 File file = new File(filePathName); // 自定义文件路径
 
 if(!folder.exists() && !folder.isDirectory()){
 
 folder.mkdir();
 }else{
 System.out.println("文件夹已存在");
 }
 if (!file.exists()) {
 try {
 file.createNewFile();
 } catch (IOException e) {
 e.printStackTrace();
 }
 }else{
 System.out.println("文件已替换");
 }
   String  encoding = "UTF-8";
OutputStreamWriter write;
try {
write = new OutputStreamWriter(new FileOutputStream(file),encoding);
BufferedWriter writer = new BufferedWriter(write);
System.out.println("SA汇总金额:"+sumAmountSA);
String returnCode = "0";
String returnDesc = "成功";
System.out.println("SA交易日期:"+yestedayDate);
int accountSA = retListSA.size();
System.out.println("SA总笔数:"+accountSA);
String strr = yestedayDate+"|"+sumAmountSA+"|"+ accountSA+"|"+returnCode+"|"+returnDesc;
writer.write(strr+"\r\n");
System.out.println(strr);
for (InfoBean ib : retListSA) {
String str = ib.getSeqNo()+"|"+ib.getBillNo()+"|"+ib.getPolicyNo()+"|"+ib.getCertiNo()+"|"+ib.getAmount()+"|"+ib.getReturnCode()+"|"+ib.getReturnDesc();
System.out.println(str);
// writer.write(ib.toString()); // 输出流写入到文件中
writer.write(str+"\r\n");

}
writer.close();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
 
 /**
  * 承保对账
  * @param filePath
  * @param filePathName
  * @param retListDW
  * @param yestedayDate
  * @param sumAmountDW
  */
 public  void writeFileDW(String filePath,String filePathName,List<InfoBean> retListDW,String yestedayDate,Double sumAmountDW){



 File folder = new File(filePath); // 自定义文件路径
 File file = new File(filePathName); // 自定义文件路径
 if(!folder.exists() && !folder.isDirectory()){
 
 folder.mkdir();
 }else{
 System.out.println("文件夹已存在");
 }
 if (!file.exists()) {
 try {
 file.createNewFile();
 } catch (IOException e) {
 e.printStackTrace();
 }
 }else{
 System.out.println("文件已替换");
 }
   String  encoding = "UTF-8";
OutputStreamWriter write;
try {
write = new OutputStreamWriter(new FileOutputStream(file),encoding);
BufferedWriter writer = new BufferedWriter(write);
System.out.println("SA汇总金额:"+sumAmountDW);
String returnCode = "0";
String returnDesc = "成功";
System.out.println("SA交易日期:"+yestedayDate);
int accountDW = retListDW.size();
System.out.println("SA总笔数:"+accountDW);
String strr = yestedayDate+"|"+sumAmountDW+"|"+ accountDW+"|"+returnCode+"|"+returnDesc;
writer.write(strr+"\r\n");
System.out.println(strr);
for (InfoBean ib : retListDW) {
String str = ib.getSeqNo()+"|"+ib.getBillNo()+"|"+ib.getPolicyNo()+"|"+ib.getCertiNo()+"|"+ib.getAmount()+"|"+ib.getReturnCode()+"|"+ib.getReturnDesc();
System.out.println(str);
writer.write(str+"\r\n");

}
writer.close();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
 * 写文件
 * @param filePath
 * @param filePathName
 * @param settlementBeanlist
 * @param yestedayDate
 * @param emptyFile
 * @return 
 */
 public void writFileINV(int countINV,List<SettlementBean> settlementBeanlist,String filePath){
   
//定义字符编码格式
String encoding = "UTF-8";
try {
int i = 0;
int y = 0;
File file = createFileINV(i);
File file1 = null;
OutputStreamWriter outWrite = new  OutputStreamWriter(new FileOutputStream(file),encoding);
for(SettlementBean stb: settlementBeanlist){//进行写文件
String settnew = stb.getSeqNo()+"|"+stb.getPolicyNo()+"|"+stb.getAccountType()+"|"+stb.getInterestRate()+"|"+stb.getSettleIntSum()+"|"+stb.getStartCapital()+"|"+stb.getEndCapital()+"|"+stb.getStartBalance()+"|"+stb.getEndBalance()+"|"+stb.getDayBeneAmount()+"|"+stb.getSettleDate();
i++;
if (y <= 4999) {
outWrite.write(settnew+"\r\n");
y++;
}
if (y%5000==0) {
outWrite.close();
file1 = createFileINV(i);
outWrite = new  OutputStreamWriter(new FileOutputStream(file1),encoding);
// outWrite.write(settnew+"\r\n");
y=0;
}
}
outWrite.close();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
 }
 
 /**
  * 创建文件
  * @param countINV 
  * @return 
  */
 public File createFileINV(int i){
int mulriple= (i/5000)+1;
String strp=String.format("%03d",mulriple);
Properties properties = new Properties();
InputStream in = Thread.currentThread().getClass().getResourceAsStream("/database.properties");
try {
properties.load(in);
} catch (IOException e) {
e.printStackTrace();
}
 Calendar calendar = Calendar.getInstance();// 此时打印它获取的是系统当前时间
 calendar.add(Calendar.DATE, Integer.valueOf(properties.getProperty("indexday"))); 
 String yestedayDay = new SimpleDateFormat("yyyyMMddHHmmss").format(calendar.getTime());
 String suffixs = properties.getProperty("suffixs");
 String splict = properties.getProperty("splict");
 String pre = properties.getProperty("pre");//INV
 String suffix = properties.getProperty("suffix");//tmp
 String yestedayDate = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
      String pathINV = properties.getProperty("pathINV");
 String filePath =  pathINV + yestedayDate;
 String beforeFileName = filePath+splict+pre+yestedayDay+strp;
 String filePathName = beforeFileName+suffixs;
 String emptyFile = beforeFileName+suffix; 
 
    File folder = new File(filePath);
File file = new File(filePathName);
File file2 = new File(emptyFile);
if(!folder.exists() && !folder.isDirectory()){
folder.mkdir();
}else{
System.out.println("文件夹已存在");
}
if(!file.exists() && !file.isDirectory()){
try {
 
file.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}else{
System.out.println("文件已替换");
}
if(!file2.exists() && !file2.isDirectory()){
try {
file2.createNewFile();
} catch (IOException e) {
e.printStackTrace();
}
}else{
System.out.println("文件已替换");
}
return file;

 }
 
}

四:主函数运行

package com.files.readdata;


import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;


/**
 * 主函数运行
 * 
 * @author Frank.dai
 *
 */
public class Execution {


public static void main(String[] args) {

CallDatabase callDatabase = new CallDatabase();
List<InfoBean> retList = new ArrayList<InfoBean>();
List<SettlementBean> settlementBeanList = new ArrayList<SettlementBean>();
try {
callDatabase.selectData(retList);
callDatabase.selectSettlementBean(settlementBeanList);
} catch (SQLException e) {
e.printStackTrace();
}


}
}



<think>好的,用户现在想了解如何在Hive中读取MySQL数据库数据或Excel文件。首先,我需要回顾之前用户的问题历史,之前他们询问过如何删除MySQL表,现在转向数据读取,特别是Hive与外部数据源的连接。这可能意味着用户在处理大数据环境,需要将不同数据源的数据整合到Hive中进行分析。 首先,用户的问题涉及两个部分:从MySQL读取和从Excel读取。我需要分别处理这两个部分,因为它们的实现方式不同。对于MySQL,Hive可以通过JDBC连接,使用外部表的方式直接查询。而Excel文件通常需要转换为CSV或文本格式,因为Hive本身不支持直接读取Excel。需要详细说明步骤,给出示例代码,确保用户能够按照指导操作。 接下来,考虑用户可能的背景。他们可能在使用Hive进行数据分析,但数据存储在MySQL或Excel中,需要将这些数据导入Hive进行处理。用户可能不太清楚Hive如何与这些数据源交互,或者遇到了连接问题。需要确保回答清晰,步骤详细,避免技术术语过多,同时提供实际例子。 对于MySQL部分,需要提到Hive的JDBC驱动,配置步骤,以及创建外部表的语法。可能用户对驱动程序的下载和配置不熟悉,需要给出具体的驱动名称和下载建议,比如建议使用MySQL官方驱动。此外,权限问题也很重要,需要提醒用户确保Hive有权限访问MySQL数据库。 关于Excel部分,用户可能需要转换文件格式,因为Hive无法直接读取Excel。需要建议使用工具或脚本将Excel转为CSV,处理可能的格式问题,比如逗号分隔、编码等。同时,创建Hive外部表时,需要指定正确的分隔符和存储格式,比如文本文件或ORC。可能用户需要处理多个Sheet或复杂格式,这时需要建议分Sheet处理或使用其他工具预处理。 还要考虑用户可能遇到的问题,比如驱动类找不到、连接字符串错误、文件路径不正确等。在注意事项中需要提到这些常见问题,给出解决方法,比如检查驱动路径、确认网络连通性等。此外,性能方面,直接通过JDBC查询大数据量可能较慢,建议先导入HDFS,或者使用批量处理。 最后,总结部分需要强调两种方法的不同,以及适用场景,帮助用户根据具体情况选择合适的方法。同时提醒数据预处理的重要性,特别是Excel文件的格式转换,确保数据能够正确导入Hive。</think>以下是 Hive 读取 MySQL 数据库和 Excel 文件数据的实现方法及步骤说明: --- ### 一、Hive 读取 MySQL 数据(通过 JDBC) #### 1. **核心原理** 使用 Hive 的 **JDBC 外部表**功能(需配置 MySQL 驱动),直接映射 MySQL 表到 Hive。 #### 2. **操作步骤** **步骤 1:上传 MySQL JDBC 驱动** 将 MySQL 驱动(如 `mysql-connector-java-8.0.28.jar`)上传到 Hive 的 `lib` 目录: ```bash cp mysql-connector-java-8.0.28.jar $HIVE_HOME/lib/ ``` **步骤 2:创建 Hive 外部表** ```sql CREATE EXTERNAL TABLE hive_mysql_table ( id INT, name STRING, age INT ) STORED BY 'org.apache.hadoop.hive.jdbc.storagehandler.JdbcStorageHandler' TBLPROPERTIES ( 'mapred.jdbc.driver.class' = 'com.mysql.cj.jdbc.Driver', 'mapred.jdbc.url' = 'jdbc:mysql://mysql_host:3306/db_name', 'mapred.jdbc.username' = 'root', 'mapred.jdbc.password' = 'password', 'mapred.jdbc.input.table.name' = 'mysql_table_name' ); ``` **示例**: 映射 MySQL 的 `users` 表到 Hive: ```sql CREATE EXTERNAL TABLE hive_users ( user_id INT, username STRING ) STORED BY 'org.apache.hadoop.hive.jdbc.storagehandler.JdbcStorageHandler' TBLPROPERTIES ( 'mapred.jdbc.driver.class' = 'com.mysql.cj.jdbc.Driver', 'mapred.jdbc.url' = 'jdbc:mysql://192.168.1.100:3306/test_db', 'mapred.jdbc.username' = 'admin', 'mapred.jdbc.password' = '123456', 'mapred.jdbc.input.table.name' = 'users' ); ``` **步骤 3:查询数据** ```sql SELECT * FROM hive_users LIMIT 10; ``` --- ### 二、Hive 读取 Excel 文件 #### 1. **核心原理** Hive 默认不支持直接读取 Excel 文件,需 **转换为文本格式(如 CSV)** 后加载。 #### 2. **操作步骤** **步骤 1:Excel 转 CSV** - 手动操作:在 Excel 中另存为 `.csv` 格式(注意编码为 UTF-8)。 - 脚本转换:使用 Python 的 `pandas` 库: ```python import pandas as pd df = pd.read_excel('data.xlsx', sheet_name='Sheet1') df.to_csv('data.csv', index=False, encoding='utf-8') ``` **步骤 2:上传 CSV 到 HDFS** ```bash hadoop fs -put data.csv /user/hive/input/ ``` **步骤 3:创建 Hive 表** ```sql CREATE EXTERNAL TABLE hive_excel_data ( column1 STRING, column2 INT, column3 DOUBLE ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/user/hive/input/'; ``` **步骤 4:查询数据** ```sql SELECT * FROM hive_excel_data; ``` --- ### 三、注意事项 1. **MySQL 读取优化** - 大数据量场景下,建议先用 `Sqoop` 将 MySQL 数据导入 HDFS,再通过 Hive 加载。 - JDBC 直连适合小规模数据或低频查询。 2. **Excel 文件处理** - 确保 CSV 文件 **无特殊字符**(如换行符、逗号嵌套),可用 `OpenCSVSerDe` 处理复杂格式: ```sql CREATE TABLE csv_table (...) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'; ``` 3. **权限问题** - MySQL 账号需具备远程访问权限(通过 `GRANT` 授权)。 - HDFS 路径需对 Hive 用户开放读权限。 4. **驱动兼容性** - Hive 版本需与 MySQL 驱动版本匹配(如 Hive 3.x + MySQL 8.x 驱动)。 --- ### 四、总结 | 数据源 | 推荐方法 | 适用场景 | |-------------|--------------------------|-----------------------| | MySQL | JDBC 外部表或 Sqoop 导入 | 高频查询或实时性要求高 | | Excel | 转换为 CSV 后加载 | 一次性或低频数据分析 | **建议**:优先通过 `Sqoop` 或 `DataX` 等工具实现结构化数据迁移,直接读取适合轻量级场景。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值