Flink消费kafka数据


持续更新中,点赞催更嘞
09-12 解决value解析不出问题

先修

解析json

maven导入

<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-core</artifactId>
            <version>2.14.2</version>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.module</groupId>
            <artifactId>jackson-module-jaxb-annotations</artifactId>
            <version>2.14.2</version>
        </dependency>

JSON测试文件

{
  "min_position": 8,
  "has_more_items": true,
  "items_html": "Bus",
  "new_latent_count": 8,
  "data":
  {
    "length": 28,
    "text": "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur."
  },
  "numericalArray":
  [
    33,
    26,
    28,
    32,
    32
  ],
  "StringArray":
  [
    "Carbon",
    "Oxygen",
    "Nitrogen",
    "Oxygen"
  ],
  "multipleTypesArray": "Hello",
  "objArray":
  [
    {
      "class": "upper",
      "age": 1
    },
    {
      "class": "lower",
      "age": 8
    },
    {
      "class": "upper",
      "age": 8
    },
    {
      "class": "lower",
      "age": 1
    },
    {
      "class": "lower",
      "age": 9
    }
  ]
}

代码主体

package src.main.java.com.aliyun.openservices.kafka.TestDemoCode;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

/**
 * 基于阿里云千问模型修改版(Jackson解析)
 */
public class JsonParser {
    public static void main(String[] args) {
        ObjectMapper objectMapper = new ObjectMapper(new JsonFactory());
        JsonNode root = null;
        try {
            String path = "test.json";
            root = objectMapper.readTree(new File(path));

        } catch (IOException e) {
            e.printStackTrace();
        }
        if (root != null) {
            // 解析JSON树并打印结果
            printJson(root);
        }
    }
    private static void printJson(JsonNode node) {
        if (node.isObject()) {
            System.out.println("{");

            for(Iterator<Map.Entry<String, JsonNode>> it = node.fields(); it.hasNext();){
                String fieldName = it.next().getKey();
                JsonNode fieldNode = node.get(fieldName);
                if (fieldNode.isObject()) {
                    System.out.println("\t" + fieldName + ": {");
                    printJson(fieldNode);
                    System.out.println("\t},");
                } else {
                    System.out.println("\t" + fieldName + ": " + fieldNode.toString());
                    System.out.println("\t},");
                }
            }
                System.out.println("}");
            }
            else if (node.isArray()) {

            System.out.println("[");
            for (JsonNode arrayNode : node) {
                if (arrayNode.isObject()) {
                    System.out.println("\t" + arrayNode.toString());
                } else {
                    System.out.println("\t" + arrayNode.toString());
                }
            }
            System.out.println("]");
        } else {
            System.out.println(node.toString());
        }

    }
}

最终输出

有一说一,代码改改竟然能跑,想改成自己需要的话,那还得具体改一下,千问还可以哈,我喜欢。

{
	min_position: 8
	},
	has_more_items: true
	},
	items_html: "Bus"
	},
	new_latent_count: 8
	},
	data: {
{
	length: 28
	},
	text: "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur."
	},
}
	},
	numericalArray: [33,26,28,32,32]
	},
	StringArray: ["Carbon","Oxygen","Nitrogen","Oxygen"]
	},
	multipleTypesArray: "Hello"
	},
	objArray: [{"class":"upper","age":1},{"class":"lower","age":8},{"class":"upper","age":8},{"class":"lower","age":1},{"class":"lower","age":9}]
	},
}

kafka写入(阿里云kafka)

开通教程

阿里云kafka官网
官方示例代码 具体写入以官方代码为准

写在前面,在写代码之前,你的云端应该配置好了kafka

代码细节 vpc-ssl版本

kafka.properties配置
在这里插入图片描述
下载如下加密文件即可
在这里插入图片描述
kafka_client_jaas.conf配置
在这里插入图片描述

自定义JSON类

package src.main.java.com.aliyun.openservices.kafka.JsonData;
import org.apache.commons.lang3.RandomStringUtils;
import java.util.Random;
// 主要转换JSON逻辑
import com.google.gson.Gson;
public class JsonOne {

    // 生成随机数据
        String uName = " " ;
        int uId = 0;

        static Random randomNum = new Random();
        static Gson gson = new Gson();
        public JsonOne(){

        }

        private JsonOne(String uName, int uId) {
            this.uName = uName;
            this.uId = uId;
        }
        public  String getValue(){
            return gson.toJson(new JsonOne(RandomStringUtils.random(5
                    , new char[]{'a','b','c','d','e','f', '1', '2', '3'})
                    ,randomNum.nextInt(20)));

        }
}

kafka消息写入

// 官方代码,修改了key,value细节参数,其他的未修改
package src.main.java.com.aliyun.openservices.kafka.ons;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.Future;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.config.SaslConfigs;
import org.apache.kafka.common.config.SslConfigs;
import src.main.java.com.aliyun.openservices.kafka.JsonData.JsonOne;

public class KafkaProducerDemo {

    public static void main(String[] args) {
        //设置sasl文件的路径
        JavaKafkaConfigurer.configureSasl();

        //加载kafka.properties
        Properties kafkaProperties =  JavaKafkaConfigurer.getKafkaProperties();

        Properties props = new Properties();
        //设置接入点,请通过控制台获取对应Topic的接入点
        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaProperties.getProperty("bootstrap.servers"));
        //设置SSL根证书的路径,请记得将XXX修改为自己的路径
        //与sasl路径类似,该文件也不能被打包到jar中
        props.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, kafkaProperties.getProperty("ssl.truststore.location"));
        //根证书store的密码,保持不变
        props.put(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, "KafkaOnsClient");
        //接入协议,目前支持使用SASL_SSL协议接入
        props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_SSL");

        // 设置SASL账号
        String saslMechanism = kafkaProperties.getProperty("sasl.mechanism");
        String username = kafkaProperties.getProperty("sasl.username");
        String password = kafkaProperties.getProperty("sasl.password");
        if (!JavaKafkaConfigurer.isEmpty(username)
                && !JavaKafkaConfigurer.isEmpty(password)) {
            String prefix = "org.apache.kafka.common.security.scram.ScramLoginModule";
            if ("PLAIN".equalsIgnoreCase(saslMechanism)) {
                prefix = "org.apache.kafka.common.security.plain.PlainLoginModule";
            }
            String jaasConfig = String.format("%s required username=\"%s\" password=\"%s\";", prefix, username, password);
            props.put(SaslConfigs.SASL_JAAS_CONFIG, jaasConfig);
        }

        //SASL鉴权方式,保持不变
        props.put(SaslConfigs.SASL_MECHANISM, saslMechanism);
        //Kafka消息的序列化方式
        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
        //请求的最长等待时间
        props.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, 30 * 1000);
        //设置客户端内部重试次数
        props.put(ProducerConfig.RETRIES_CONFIG, 5);
        //设置客户端内部重试间隔
        props.put(ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG, 3000);

        //hostname校验改成空
        props.put(SslConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG, "");

        //构造Producer对象,注意,该对象是线程安全的,一般来说,一个进程内一个Producer对象即可;
        //如果想提高性能,可以多构造几个对象,但不要太多,最好不要超过5个
        KafkaProducer<String, String> producer = new KafkaProducer<String, String>(props);

        //构造一个Kafka消息
        String topic = kafkaProperties.getProperty("topic"); //消息所属的Topic,请在控制台申请之后,填写在这里
        String key = "user_";

        try {
            //批量获取 futures 可以加快速度, 但注意,批量不要太大
            List<Future<RecordMetadata>> futures = new ArrayList<Future<RecordMetadata>>(128);
            Random randomNum = new Random();
            JsonOne jsonOne = new JsonOne();
            for (int i =0; i < 100; i++) {
                //发送消息,并获得一个Future对象
                String value = jsonOne.getValue();
                ProducerRecord<String, String> kafkaMessage =  new ProducerRecord<String, String>(topic,
                        // 为了更好的抽取数据,利用key来进行分类处理
                        key + randomNum.nextInt(20),
                        value
                        );
                Future<RecordMetadata> metadataFuture = producer.send(kafkaMessage);
                futures.add(metadataFuture);

            }
            producer.flush();
            for (Future<RecordMetadata> future: futures) {
                //同步获得Future对象的结果
                try {
                    RecordMetadata recordMetadata = future.get();
                    System.out.println("Produce ok:" + recordMetadata.toString());
                } catch (Throwable t) {
                    t.printStackTrace();
                }
            }
        } catch (Exception e) {
            //客户端内部重试之后,仍然发送失败,业务要应对此类错误
            //参考常见报错: https://help.aliyun.com/document_detail/68168.html?spm=a2c4g.11186623.6.567.2OMgCB
            System.out.println("error occurred");
            e.printStackTrace();
        }
    }
}

kafka端查看
在这里插入图片描述

Flink 消费kafka

vvr-8.0.1-flink-1.17

--********************************************************************--
-- Author:         sk
-- Created Time:   2023-09-11 17:17:13
-- Description:    Write your description here
-- Hints:          You can use SET statements to modify the configuration
--********************************************************************--

/*
-- key解析不到,value是可以解析的
CREATE TEMPORARY TABLE source_kafka01 (
  `user_id` STRING
  ,
  `Json_one` STRING,
  -- `Json_two` BIGINT,
  `ts` TIMESTAMP_LTZ(3) METADATA FROM 'timestamp' VIRTUAL
) WITH (
  'connector' = 'kafka',
  'topic' = 'sk_test_01',
  'properties.bootstrap.servers' = 'vpc:port',
  'scan.startup.mode' = 'earliest-offset',
  'key.fields' = 'user_id',
  --从指定的毫秒时间戳1655395200000开始消费
--   'scan.startup.mode' = 'timestamp',
--   'scan.startup.timestamp-millis' = '1655395200000',
  'value.format' = 'raw'
  ,'key.format' = 'raw'
  ,'value.fields-include' = 'EXCEPT_KEY'
  -- ,'json.ignore-parse-errors' = 'true'
)
;
*/
-- key解析不到,value是可以解析的
-- 列名设置Json_one 解析不出来 ,设置uName则可以解析
-- 解决:对应列名应该设置相同Json中字符,否则会解析不出来
DROP TEMPORARY TABLE IF EXISTS source_kafka01;
CREATE TEMPORARY TABLE source_kafka01 (
  `user_id` STRING
  ,`uName` STRING
  ,`uId` BIGINT
  ,`ts` TIMESTAMP_LTZ(3) METADATA FROM 'timestamp' VIRTUAL
) WITH (
  'connector' = 'kafka',
  'topic' = 'sk_test_01',
  'properties.bootstrap.servers' = 'vpc:port',
  'scan.startup.mode' = 'earliest-offset',
  'key.fields' = 'user_id',
  'key.format' = 'raw'
  --从指定的毫秒时间戳1655395200000开始消费
--   'scan.startup.mode' = 'timestamp',
--   'scan.startup.timestamp-millis' = '1655395200000',
  ,'value.format' = 'json'
  ,'value.fields-include' = 'EXCEPT_KEY'
  -- ,'json.ignore-parse-errors' = 'true'
  -- ,'value.fields-include' = 'ALL'
)
;

SELECT * FROM source_kafka01
;
/*
CREATE TEMPORARY TABLE sink_print
(
  `user_id` STRING
  ,`Json_one` STRING
  -- ,`Json_two` BIGINT
  ,`ts` TIMESTAMP_LTZ(3)
)
WITH (
    'connector' = 'print'
)
;
INSERT INTO sink_print
SELECT * FROM source_kafka01
;
*/

消费结果
在这里插入图片描述
预期结果
在这里插入图片描述

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值