1、Impala
配置修改
在每一个服务器的 Impala
的配置文件中添加如下配置。
vim /etc/default/impala
# 在 IMPALA_SERVER_ARGS 下添加:
-kudu_master_hosts=node-1:7051,node-2:7051,node-3:7051
2、创建Kudu
表
需要先启动 hdfs
、Hive
、Kudu
、Impala
。使用 Impala
的 Shell
控制台。
2.1、内部表
内部表由 Impala
管理,当您从Impala
中删除时,数据和表确实被删除。**当您使用 ****Impala
**创建新表时,它通常是内部表。
CREATE TABLE my_first_table(
id BIGINT,
name STRING,
PRIMARY KEY(id)
)
PARTITION BY HASH PARTITIONS 16
STORED AS KUDU
TBLPROPERTIES (
'kudu.master_addresses' = 'node1:7051,node2:7051,node3:7051',
'kudu.table_name' = 'my_first_table'
);
# 在 CREATE TABLE 语句中,必须首先列出构成主键的列。
2.2、外部表
外部表(创建者 CREATE EXTERNAL TABLE
)不受 Impala
管理,并且删除此表不会将表从其源位置(此处为 Kudu
)丢弃。相反,它只会去除 Impala
和 Kudu
之间的映射。这是 Kudu
提供的用于将现有表映射到Impala
的语法。
首先使用 Java
创建 Kudu
表:
public class CreateTable {
private static ColumnSchema newColumn(String name, Type type, boolean iskey) {
ColumnSchema.ColumnSchemaBuilder column =
new ColumnSchema.ColumnSchemaBuilder(name, type);
column.key(iskey);
return column.build();
}
public static void main(String[] args) throws KuduException {
// master 地址
final String masteraddr = "node1,node2,node3";
// 创建 kudu 的数据库链接
KuduClient client = new
KuduClient.KuduClientBuilder(masteraddr).defaultSocketReadTimeoutMs(6000)
.build();
// 设置表的 schema
List<ColumnSchema> columns = new LinkedList<ColumnSchema>();
columns.add(newColumn("CompanyId", Type.INT32, true));
columns.add(newColumn("WorkId", Type.INT32, false));
columns.add(newColumn("Name", Type.STRING, false));
columns.add(newColumn("Gender", Type.STRING, false));
columns.add(newColumn("Photo", Type.STRING, false));
Schema schema = new Schema(columns);
//创建表时提供的所有选项
CreateTableOptions options = new CreateTableOptions();
// 设置表的 replica 备份和分区规则
List<String> parcols = new LinkedList<String>();
parcols.add("CompanyId");
//设置表的备份数
options.setNumReplicas(1);
//设置 range 分区
options.setRangePartitionColumns(parcols);
//设置 hash 分区和数量
options.addHashPartitions(parcols, 3);
try {
client.createTable("person", schema, options);
} catch (KuduException e) {
e.printStackTrace();
}
client.close();
}
}
使用Impala
创建外部表 , 将 Kudu
的表映射到Impala
上。
CREATE EXTERNAL TABLE `person` STORED AS KUDU
TBLPROPERTIES(
'kudu.table_name' = 'person',
'kudu.master_addresses' = 'node1:7051,node2:7051,node3:7051')
3、 使用Impala
对Kudu
进行DML
3.1、插入数据
Impala
允许使用标准 SQL
语句将数据插入 Kudu
。
CREATE TABLE my_first_table1(
id BIGINT,
name STRING,
PRIMARY KEY(id)
)
PARTITION BY HASH PARTITIONS 16
STORED AS KUDU
TBLPROPERTIES(
'kudu.table_name' = 'person1',
'kudu.master_addresses' =
'node1:7051,node2:7051,node3:7051');
# 此示例插入单个行
INSERT INTO my_first_table VALUES (50, "zhangsan");
# 此示例插入 3 行:
INSERT INTO my_first_table VALUES (1, "john"), (2, "jane"), (3, "jim");
批量导入数据:从Impala
和Kudu
的角度来看,通常表现最好的方法通常是使用Impala
中SELECT FROM
语句导入数据。
INSERT INTO my_first_table SELECT * FROM temp1;
3.2、更新数据
UPDATE my_first_table SET name="xiaowang" where id =1 ;
3.3、删除数据
delete from my_first_table where id =2;
4、更改表属性
4.1、重命名Impala
表
ALTER TABLE PERSON RENAME TO person_temp;
4.2、重新命名内部表的基础 Kudu
表
CREATE TABLE kudu_student (
CompanyId INT,
WorkId INT,
Name STRING,
Gender STRING,
Photo STRING,
PRIMARY KEY(CompanyId)
)
PARTITION BY HASH PARTITIONS 16
STORED AS KUDU
TBLPROPERTIES (
'kudu.master_addresses' = 'node1:7051,node2:7051,node3:7051',
'kudu.table_name' = 'student'
);
如果表是内部表,则可以通过更改 kudu.table_name
属性重命名底层的Kudu
表。
ALTER TABLE kudu_student SET TBLPROPERTIES('kudu.table_name' = 'new_student');
4.3、将外部表重新映射 Kudu
表
如果用户在使用过程中发现其他应用程序重新命名了 Kudu
表,那么此时的外部表需要重新映射到 Kudu
上。
4.3.1、创建一个外部表
CREATE EXTERNAL TABLE external_table
STORED AS KUDU
TBLPROPERTIES (
'kudu.master_addresses' = 'node1:7051,node2:7051,node3:7051',
'kudu.table_name' = 'person'
);
4.3.2、重新映射外部表,指向不同的 Kudu
表
ALTER TABLE external_table
SET TBLPROPERTIES('kudu.table_name' = 'hashTable')
上面的操作是: 将 external_table
映射的 PERSON
表重新指向hashTable
表。
4.4、更改 Kudu master
地址
ALTER TABLE my_table
SET TBLPROPERTIES('kudu.master_addresses' = 'kudu-new-master.example.com:7051');
4.5、将内部表改为外部表
ALTER TABLE my_table SET TBLPROPERTIES('EXTERNAL' = 'TRUE');
5、Impala
使用 Java
操作 Kudu
对于 Impala
而言,开发人员是可以通过JDBC
连接 Impala
的,有了 JDBC
,开发人员可以通过 Impala
来间接操作 Kudu
。
5.1、引入依赖
<!--impala 的 jdbc 操作-->
<dependency>
<groupId>com.cloudera</groupId>
<artifactId>ImpalaJDBC41</artifactId>
<version>2.5.42</version>
</dependency>
<!--Caused by : ClassNotFound : thrift.protocol.TPro-->
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libfb303</artifactId>
<version>0.9.3</version>
<type>pom</type>
</dependency>
<!--Caused by : ClassNotFound : thrift.protocol.TPro-->
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>0.9.3</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service-rpc</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
</exclusion>
</exclusions>
<version>1.1.0</version>
</dependency>
<!--导入 hive-->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>1.1.0</version>
</dependency>
5.2、JDBC
连接 Impala
操作 Kudu
使用 JDBC
连接 Impala
操作 Kudu
,与 JDBC
连接 MySQL
做更重增删改查基本一样**。
5.2.1、创建实体类
@Setter
@Getter
public class Person {
private int companyId;
private int workId;
private String name;
private String gender;
private String photo;
public Person(int companyId, int workId, String name, String gender, String photo) {
this.companyId = companyId;
this.workId = workId;
this.name = name;
this.gender = gender;
this.photo = photo;
}
}
5.2.2、JDBC
连接 Impala
对Kudu
进行增删改查
import java.sql.*;
public class Contants {
private static String JDBC_DRIVER="com.cloudera.impala.jdbc41.Driver";
private static String
CONNECTION_URL="jdbc:impala://node1:21050/default;auth=noSasl";
//定义数据库连接
static Connection conn=null;
//定义 PreparedStatement 对象
static PreparedStatement ps=null;
//定义查询的结果集
static ResultSet rs= null;
//数据库连接
public static Connection getConn(){
try {
Class.forName(JDBC_DRIVER);
conn = DriverManager.getConnection(CONNECTION_URL);
} catch (Exception e) {
e.printStackTrace();
}
return conn;
}
//创建一个表
public static void createTable(){
conn=getConn();
String sql = "CREATE TABLE impala_kudu_test" +
"(companyId BIGINT," +
"workId BIGINT," +
"name STRING," +
"gender STRING," +
"photo STRING," +
"PRIMARY KEY(companyId)" +
")" +
"PARTITION BY HASH PARTITIONS 16 " +
"STORED AS KUDU " +
"TBLPROPERTIES (" +
"'kudu.master_addresses' = 'node1:7051,node2:7051,node3:7051'," +
"'kudu.table_name' = 'impala_kudu_test'" +
");";
try {
ps = conn.prepareStatement(sql);
ps.execute();
} catch (SQLException e) {
e.printStackTrace();
}
}
//查询数据
public static ResultSet queryRows(){
try {
//定义执行的 sql 语句
String sql="select * from impala_kudu_test";
ps = getConn().prepareStatement(sql);
rs= ps.executeQuery();
} catch (SQLException e) {
e.printStackTrace();
}
return rs;
}
//打印结果
public static void printRows(ResultSet rs) {
/**
private int companyId;
private int workId;
private String name;
private String gender;
private String photo;
*/
try {
while (rs.next()){
//获取表的每一行字段信息
int companyId = rs.getInt("companyId");
int workId = rs.getInt("workId");
String name = rs.getString("name");
String gender = rs.getString("gender");
String photo = rs.getString("photo");
System.out.print("companyId:"+companyId+" ");
System.out.print("workId:"+workId+" ");
System.out.print("name:"+name+" ");
System.out.print("gender:"+gender+" ");
System.out.println("photo:"+photo);
}
} catch (SQLException e) {
e.printStackTrace();
} finally {
if(ps != null){
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(conn !=null){
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
//插入数据
public static void insertRows(Person person) {
conn = getConn();
String sql = "insert into table impala_kudu_test(companyId,workId,name,gender,photo) values(?,?,?,?,?)";
try {
ps=conn.prepareStatement(sql);
//给占位符?赋值
ps.setInt(1,person.getCompanyId());
ps.setInt(2,person.getWorkId());
ps.setString(3,person.getName());
ps.setString(4,person.getGender());
ps.setString(5,person.getPhoto());
ps.execute();
} catch (SQLException e) {
e.printStackTrace();
}finally {
if(ps !=null){
try {
//关闭
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(conn !=null){
try {
//关闭
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
//更新数据
public static void updateRows(Person person) {
//定义执行的 sql 语句
String sql = "update impala_kudu_test set workId=" + person.getWorkId()+
",name='" + person.getName() + "' ," + "gender='" + person.getGender() + "' ," +
"photo='" + person.getPhoto() +"' where companyId=" + person.getCompanyId();
try {
ps= getConn().prepareStatement(sql);
ps.execute();
} catch (SQLException e) {
e.printStackTrace();
}finally {
if(ps !=null){
try {
//关闭
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if(conn !=null){
try {
//关闭
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
//删除数据
public static void deleteRows(int companyId){
//定义 sql 语句
String sql="delete from impala_kudu_test where companyId=" + companyId;
try {
ps =getConn().prepareStatement(sql);
ps.execute();
} catch (SQLException e) {
e.printStackTrace();
}
}
//删除表
public static void dropTable() {
String sql="drop table if exists impala_kudu_test";
try {
ps =getConn().prepareStatement(sql);
ps.execute();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
5.2.3、代码测试运行
import java.sql.Connection;
public class ImpalaJdbcClient {
public static void main(String[] args) {
Connection conn = Contants.getConn();
//创建一个表
Contants.createTable();
//插入数据
Contants.insertRows(new Person(1,100,"lisi","male","lisi-photo"));
//查询表的数据
ResultSet rs = Contants.queryRows();
Contants.printRows(rs);
//更新数据
Contants.updateRows(new Person(1,200,"zhangsan","male","zhangsan-photo"));
//删除数据
Contants.deleteRows(1);
//删除表
Contants.dropTable();
}
}