版本:Apache Flink 1.11.2 for Scala 2.11 下载地址:
https://mirror.bit.edu.cn/apache/flink/flink-1.11.2/flink-1.11.2-bin-scala_2.11.tgz下载地址官网有变动,我直接贴最全的flink 下载地址,可以自行在上面下载 :
Hive 2.1.1
参考官方Example链接:Apache Flink 1.11 Documentation: HiveCatalog
1、hive-site.xml 内容
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>
<property>
<name>datanucleus.schema.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>192.168.2.3:2181</value>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>/data/emr/hive/tmp/${hive.session.id}_resources</value>
</property>
<property>
<name>hive.exec.local.scratchdir</name>
<value>/data/emr/hive/tmp</value>
</property>
<property>
<name>hive.hwi.listen.host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hive.hwi.listen.port</name>
<value>7002</value>
</property>
<property>
<name>hive.llap.daemon.output.service.port</name>
<value>7009</value>
</property>
<property>
<name>hive.llap.daemon.rpc.port</name>
<value>7007</value>
</property>
<property>
<name>hive.llap.daemon.web.port</name>
<value>7008</value>
</property>
<property>
<name>hive.llap.daemon.yarn.shuffle.port</name>
<value>7006</value>
</property>
<property>
<name>hive.llap.management.rpc.port</name>
<value>7005</value>
</property>
<property>
<name>hive.metastore.db.encoding</name>
<value>UTF-8</value>
</property>
<property>
<name>hive.metastore.port</name>
<value>7004</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.schema.verification.record.version</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/usr/hive/warehouse</value>
</property>
<property>
<name>hive.querylog.location</name>
<value>/data/emr/hive/tmp</value>
</property>
<property>
<name>hive.security.authenticator.manager</name>
<value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value>
</property>
<property>
<name>hive.security.authorization.createtable.owner.grants</name>
<value>ALL</value>
</property>
<property>
<name>hive.security.authorization.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.security.authorization.manager</name>
<value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/data/emr/hive/tmp/operation_logs</value>
</property>
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>false</value>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>192.168.2.3</value>
</property>
<property>
<name>hive.server2.thrift.http.port</name>
<value>7000</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>7001</value>
</property>
<property>
<name>hive.server2.webui.host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hive.server2.webui.port</name>
<value>7003</value>
</property>
<property>
<name>hive.users.in.admin.role</name>
<value>hadoop</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>192.168.2.3:2181</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.14.23:3306/hivemetastore?useSSL=false&createDatabaseIfNotExist=true&characterEncoding=UTF-8</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<property>
<name>hive.exec.post.hooks</name>
<value>org.apache.atlas.hive.hook.HiveHook,org.apache.hadoop.hive.ql.hooks.LineageLogger</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://192.168.2.3:7004</value>
</property>
</configuration>
2、修改sql配置文件
cd /usr/local/service/flink-1.11.2/conf
vim sql-client-defaults.yaml
最终sql-client-default.yaml文件内容,修改的时候主要注意一下格式,把''[]'' 去掉(catalogs:[] # empty list)
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
# This file defines the default environment for Flink's SQL Client.
# Defaults might be overwritten by a session specific environment.
# See the Table API & SQL documentation for details about supported properties.
#==============================================================================
# Tables
#==============================================================================
# Define tables here such as sources, sinks, views, or temporal tables.
tables: [] # empty list
# A typical table source definition looks like:
# - name: ...
# type: source-table
# connector: ...
# format: ...
# schema: ...
# A typical view definition looks like:
# - name: ...
# type: view
# query: "SELECT ..."
# A typical temporal table definition looks like:
# - name: ...
# type: temporal-table
# history-table: ...
# time-attribute: ...
# primary-key: ...
#==============================================================================
# User-defined functions
#==============================================================================
# Define scalar, aggregate, or table functions here.
functions: [] # empty list
# A typical function definition looks like:
# - name: ...
# from: class
# class: ...
# constructor: ...
#==============================================================================
# Catalogs
#==============================================================================
# Define catalogs here.
catalogs: # empty list
# A typical catalog definition looks like:
- name: myhive
type: hive
hive-conf-dir: /usr/local/service/hive/conf
# default-database: ...
#==============================================================================
# Modules
#==============================================================================
# Define modules here.
#modules: # note the following modules will be of the order they are specified
# - name: core
# type: core
#==============================================================================
# Execution properties
#==============================================================================
# Properties that change the fundamental execution behavior of a table program.
execution:
# select the implementation responsible for planning table programs
# possible values are 'blink' (used by default) or 'old'
planner: blink
# 'batch' or 'streaming' execution
type: streaming
# allow 'event-time' or only 'processing-time' in sources
time-characteristic: event-time
# interval in ms for emitting periodic watermarks
periodic-watermarks-interval: 200
# 'changelog', 'table' or 'tableau' presentation of results
result-mode: table
# maximum number of maintained rows in 'table' presentation of results
max-table-result-rows: 1000000
# parallelism of the program
parallelism: 1
# maximum parallelism
max-parallelism: 128
# minimum idle state retention in ms
min-idle-state-retention: 0
# maximum idle state retention in ms
max-idle-state-retention: 0
# current catalog ('default_catalog' by default)
# current-catalog: default_catalog
current-catalog: myhive
# current database of the current catalog (default database of the catalog by default)
#current-database: default_database
current-database: default
# controls how table programs are restarted in case of a failures
restart-strategy:
# strategy type
# possible values are "fixed-delay", "failure-rate", "none", or "fallback" (default)
type: fallback
#==============================================================================
# Configuration options
#==============================================================================
# Configuration options for adjusting and tuning table programs.
# A full list of options and their default values can be found
# on the dedicated "Configuration" web page.
# A configuration can look like:
# configuration:
# table.exec.spill-compression.enabled: true
# table.exec.spill-compression.block-size: 128kb
# table.optimizer.join-reorder-enabled: true
#==============================================================================
# Deployment properties
#==============================================================================
# Properties that describe the cluster to which table programs are submitted to.
deployment:
# general cluster communication timeout in ms
response-timeout: 5000
# (optional) address from cluster to gateway
gateway-address: ""
# (optional) port from cluster to gateway
gateway-port: 0
3、新建一个sqlLibs文件夹,里面添加jar包
cd /usr/local/service/flink-1.11.2
mkdir sqlLibs
flink lib下的包如下:
4、kafka 新建topic,写入数据
#新建topic
./kafka-topics.sh --zookeeper localhost:2181 --topic test_12 --partitions 1 --replication-factor 1 --create
#写入数据
./kafka-console-producer.sh --broker-list localhost:9092 --topic test_12
>tom,15
>jhon,21
#验证 是否写入,消费
./kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic test_12 --from-beginning
5、启动flink sql client:
cd /usr/local/service/flink-1.11.2
./sql-client.sh embedded -d ../conf/sql-client-defaults.yaml -l ../sqlLibs
6、创建表
CREATE TABLE mykafka_t (name String, age Int) WITH (
'connector.type' = 'kafka',
'connector.version' = 'universal',
'connector.topic' = 'test_12',
'connector.properties.bootstrap.servers' = '119.29.23.123:9092',
'format.type' = 'csv',
'update-mode' = 'append'
);
7、查询结果,一直往topic里面写数据
select * from mykafka_t;
写数据进去kafka
/usr/local/service/kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic test_12
>tom,15
>john,21
>sam,14
flink sql结果展示
bingo!!!终于完成了,你以为我一开始就是这么顺畅的嘛?怎么可能,下面才是重点呢,遇到的问题:
error1:
Flink SQL> select * from mykafka_t;
[ERROR] Could not execute SQL statement. Reason:
org.apache.flink.table.api.NoMatchingTableFactoryException: Could not find a suitable table factory for 'org.apache.flink.table.factories.TableSourceFactory' in
the classpath.
Reason: Required context properties mismatch.
The matching candidates:
org.apache.flink.table.sources.CsvAppendTableSourceFactory
Mismatched properties:
'connector.type' expects 'filesystem', but is 'kafka'
The following properties are requested:
connector.properties.bootstrap.servers=119.29.23.123:9092
connector.topic=test_12
connector.type=kafka
connector.version=universal
format.type=csv
schema.0.data-type=VARCHAR(2147483647)
schema.0.name=name
schema.1.data-type=INT
schema.1.name=age
update-mode=append
The following factories have been considered:
org.apache.flink.table.sources.CsvBatchTableSourceFactory
org.apache.flink.table.sources.CsvAppendTableSourceFactory
org.apache.flink.table.filesystem.FileSystemTableFactory
solve: 缺少flink-connector-kafka_2.11-1.11.2.jar , flink-connector-kafka-base_2.11-1.11.2.jar ,flink-csv-1.11.2.jar
error2:
Flink SQL> select * from source_kafka4;
[ERROR] Could not execute SQL statement. Reason:
java.lang.ClassNotFoundException: org.apache.kafka.common.serialization.ByteArrayDeserialize
solve:缺少kafka-clients-2.4.1.jar