idea连接Hadoop以及通过idea将Hadoop中的文件数据写入数据库中
文章目录
一、idea连接Hadoop
1.新建maven项目
2.添加hadoop相关jar包

Ctrl+C&&Ctrl+V
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
3.配置log4j.properties文件

然后将下面内容复制进去
hadoop.root.logger=DEBUG, console
log4j.rootLogger = DEBUG, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.out
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
然后如下操作

4.编写HDFS工具类
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.io.IOUtils;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
public class URLcat {
static {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) throws MalformedURLException , IOException {
InputStream in=null;
try{
in=new URL("hdfs://106.13.58.73:9000/in/text1.txt").openStream();
IOUtils.copyBytes(in,System.out,4096,false);
}finally {
IOUtils.closeStream(in);
}
}
}
如果出现如下问题

请参考如下文章解决
log4j警告之log4j:WARN No appenders could be found for logger(org.apache.ibatis.logging.LogFactory).
二、idea连接MySQL
1.导入mysql-connector-java-5.1.41-bin.jar包
2.编写MySQL类
import java.sql.*;
public class example1 {
static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
static final String DB_URL = "jdbc:mysql://localhost:3306/ceshi";
static final String USER = "root";
static final String PASS = "aaaa1234!";
public static void main(String[] args) {
// TODO Auto-generated method stub
Connection conn = null;
Statement stmt = null;
try {
// 注册 JDBC 驱动
Class.forName(JDBC_DRIVER);
// 打开链接
System.out.println("连接数据库...");
conn = DriverManager.getConnection(DB_URL, USER, PASS);
// 执行查询
System.out.println("实例化Statement对象...");
stmt = conn.createStatement();
String sql;
sql = "SELECT * FROM id_pass";//搜索login表,实际须填入数据库的表名
ResultSet rs = stmt.executeQuery(sql);
// 展开结果集数据库
while (rs.next()) {
// 通过字段检索
int id = rs.getInt("id");//得到“id”列的值,类型需要与数据库的数值类型相同
String password = rs.getString("password");//得到“password”列的值,类型需要与数据库的数值类型相同
//如有其他列可以选择添加
// 输出数据
System.out.print("ID: " + id);
System.out.print(", 密码: " + password);
System.out.print("\n");
}
// 完成后关闭
rs.close();
stmt.close();
conn.close();
} catch (SQLException se) {
// 处理 JDBC 错误
se.printStackTrace();
} catch (Exception e) {
// 处理 Class.forName 错误
e.printStackTrace();
} finally {
// 关闭资源
try {
if (stmt != null) stmt.close();
} catch (SQLException se2) {
}// 什么都不做
try {
if (conn != null) conn.close();
} catch (SQLException se) {
se.printStackTrace();
}
}
System.out.println("Goodbye!");
}
}
三、实现idea将Hadoop中的文件数据写入数据库中
这一项必须保证前面两步没什么问题,也就是导包没什么问题
编写HdfsMysql类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.*;
public class HdfsMysql {
//数据库:ceshi Host Address:localhost 端口:3306
static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
static final String DB_URL = "jdbc:mysql://localhost:3306/ceshi";
static final String USER = "root";
static final String PASS = "aaaa1234!";
public static void main(String[] args) throws URISyntaxException, IOException, ClassNotFoundException, SQLException {
/**
* 创建filesystem对象,连接hdfs
*/
URI uri = new URI("hdfs://106.13.58.73:9000");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(uri, conf);
/**
* 连接数据库
*/
Connection conn = null;
Statement stmt = null;
Class.forName(JDBC_DRIVER);//注册JDBC驱动
conn = DriverManager.getConnection(DB_URL, USER, PASS);//打开数据库
PreparedStatement ps = conn.prepareStatement("insert into photo(photo_id) values (?)");
/**
* 从hdfs中读取数据,切分数据,插入MySQL
*/
FSDataInputStream in = fs.open(new Path("/in/text1.txt"));
BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
String line = null;
while ((line = br.readLine()) != null) {
String[] split = line.split(",");
ps.setString(1, split[0]);
//ps.setString(2, split[1]);
//ps.setInt(3, Integer.parseInt(split[2]));
ps.executeUpdate();
}
ps.close();
in.close();
br.close();
}
}
要么努力,要么放弃。
2416

被折叠的 条评论
为什么被折叠?



