解析CSV文件为java对象并上传到kafka

解析CSV文件为java对象并上传到kafka

java 1.8

kafka 2.6.0

需求:解析csv格式的日志文件,并按照规定的key与json格式的value传到kafka的topic上,需要写一个input工具。

思路整理

  1. java处理csv文件,每行为单位保存到String变量中;

  2. 将每行的String类型变量进行截取并set到我们的实体类(当然也可以不映射,但是拼json的时候得手动拼接),并按照需求拼接对应的key;

  3. 每条记录上传到kafka上。

开整

  1. pom文件,其中比较重要的是csv的解析jar和阿里的json包;

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
    
      <groupId>com.huyue.flink</groupId>
      <artifactId>CSVKafkaToFlink</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <packaging>jar</packaging>
    
      <name>CSVKafkaToFlink</name>
      <url>http://maven.apache.org</url>
    
      <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
      </properties>
    
      <dependencies>
      		<dependency>
    			<groupId>com.alibaba</groupId>
    			<artifactId>fastjson</artifactId>
    			<version>1.2.56</version>
    		</dependency>
      
    		<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
    		<dependency>
    			<groupId>org.apache.flink</groupId>
    			<artifactId>flink-clients_2.12</artifactId>
    			<version>1.11.1</version>
    		</dependency>
    		
    		<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
    		<dependency>
    			<groupId>org.apache.flink</groupId>
    			<artifactId>flink-streaming-java_2.12</artifactId>
    			<version>1.11.1</version>
    		</dependency>
    
    
    		<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java -->
    		<dependency>
    			<groupId>org.apache.flink</groupId>
    			<artifactId>flink-java</artifactId>
    			<version>1.11.1</version>
    		</dependency>
    
    		<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
    		<dependency>
    			<groupId>org.apache.flink</groupId>
    			<artifactId>flink-connector-kafka_2.12</artifactId>
    			<version>1.11.1</version>
    		</dependency>
    		
    		<dependency>
    			<groupId>com.alibaba</groupId>
    			<artifactId>fastjson</artifactId>
    			<version>1.2.56</version>
    		</dependency>
    		
    		<!-- 读取csv文件的必要jar -->
    		 <dependency>
                <groupId>net.sourceforge.javacsv</groupId>
                <artifactId>javacsv</artifactId>
                <version>2.0</version>
            </dependency>
            
             <dependency>
                <groupId>com.fasterxml.jackson.core</groupId>
                <artifactId>jackson-databind</artifactId>
                <version>2.9.10.1</version>
            </dependency>
    
       <!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka -->
    		<dependency>
    			<groupId>org.apache.kafka</groupId>
    			<artifactId>kafka_2.12</artifactId>
    			<version>2.6.0</version>
    		</dependency>
    
    		<dependency>
    			<groupId>org.apache.kafka</groupId>
    			<artifactId>kafka-clients</artifactId>
    			<version>2.6.0</version>
    		</dependency>
      
        <dependency>
          <groupId>junit</groupId>
          <artifactId>junit</artifactId>
          <version>3.8.1</version>
          <scope>test</scope>
        </dependency>
      </dependencies>
    </project>
    
  2. 编写对于的Bean实体类,由于我的实体类中字段比较多,我只粘出部分

    public class ComponentSFCBean {
    	private String xxx ;
    	private String xxx;
    	private Integer xx;
    	private String xx;
    	private long xx;
        ...字段
            
        ...getter,setter方法
            
        ...构造函数
            
        ...toString()方法
    
  3. 编写kafka生产者

    /**
     * 
     */
    package com.huyue.flink.kafka;
    
    import java.util.Date;
    import java.util.List;
    import java.util.Properties;
    
    import org.apache.kafka.clients.producer.ProducerRecord;
    import com.huyue.flink.pojo.TestBean;
    
    
    /**
     * @author Hu.Yue
     *
     */
    public class KafkaProducer{
    	private final String topic;
    	private final org.apache.kafka.clients.producer.KafkaProducer<String, String> producer;
    	
        
    	public KafkaProducer(String kafkaTopic) {
    		this.topic = kafkaTopic;
    		this.producer = new org.apache.kafka.clients.producer.KafkaProducer<>(createKafkaProperties());
    	}
    	
        /**  
    	* @Author: Hu.Yue
    	* @Title: uploadData  
    	* @Description: 上传key与value方法 
    	* @param @param key
    	* @param @param value 
    	* @return void 
    	* @throws  
    	*/ 
    	public void uploadData(String key, String value) {
    		ProducerRecord<String, String> producerRecord = new ProducerRecord<String, String>(topic, key, value);
    		producer.send(producerRecord);
    	}
    	
    	/**  
    	* @Author: Hu.Yue
    	* @Title: createKafkaProperties  
    	* @Description: 配置 
    	* @param @return 
    	* @return Properties 
    	* @throws  
    	*/ 
    	private static Properties createKafkaProperties() {
    		Properties properties = new Properties();
    		//集群地址
    		properties.put("bootstrap.servers", "localhost");
    		//安全机制
    		properties.put("sasl.jaas.config", "org.apache.kafka.common.security.scram.ScramLoginModule required username='xxx' password='xxx';");
    		properties.put("security.protocol", "SASL_PLAINTEXT");
    		properties.put("sasl.mechanism", "PLAIN");
    		//序列化
    		properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    		properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    		//参数配置
    		properties.put("acks", "all");
    		properties.put("retries", 3);
    		properties.put("batch.size", 65536);
    		properties.put("linger.ms", 1);
    		properties.put("buffer.memory", 33554432);
    		properties.put("max.request.size", 10485760);
    		
    		return properties;
    	}
    	
        /**  
    	* @Author: Hu.Yue
    	* @Title: closeClient  
    	* @Description: 关闭客户端 
    	* @param  
    	* @return void 
    	* @throws  
    	*/ 
    	public void closeClient() {
    		producer.flush();
    		producer.close();
    	}
    }
    
  4. 编写主函数做调用提交;

    /**
     * 
     */
    package com.huyue.flink;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.io.IOException;
    
    import com.alibaba.fastjson.JSONObject;
    import com.huyue.flink.kafka.KafkaProducer;
    import com.huyue.flink.pojo.ComponentSFCBean;
    import com.huyue.flink.pojo.TestBean;
    import com.huyue.flink.utils.TestCsvFileReader;
    import com.huyue.flink.utils.TestCsvReader;
    
    import java.util.ArrayList;
    import java.util.List;
    import java.util.function.Supplier;
    import java.util.stream.Stream;
    
    /**
     * @author Hu.Yue
     *
     */
    public class SendMessageApplication {
    
    	/**
    	 * @throws IOException   
    	* @Author: Hu.Yue
    	* @Title: main  
    	* @Description: TODO 
    	* @param @param args 
    	* @return void 
    	* @throws  
    	*/
    	public static void main(String[] args) throws IOException {
    		//CSV文件地址
    		String filePath = "E:\\提交\\文件.csv";
    	
    		//kafka topic
    		String topic = "huyue-topic";
    		readCsv(filePath);
    	}
    	
    	/**  
    	* @Author: Hu.Yue
    	* @Title: readCsv  
    	* @Description: 文件读取以及单条kafka提交 
    	* @param @param file 
    	* @return void 
    	* @throws  
    	*/ 
    	public static void readCsv(String file) {
    		// TODO Auto-generated method stub
    		File csv = new File(file);
    		BufferedReader textFile = null;
    		try {
    			textFile = new BufferedReader(new FileReader(csv));
    			String lineDta = "";
    			while ((lineDta = textFile.readLine()) != null) {
    				//1. 依次读取文档中的文件
    				System.out.println(lineDta);
    				//2. 对单条数据进行字符串截取
    				String[] componentStr = lineDta.split(",");
    				//3. 依次对字段进行赋值
    				ComponentSFCBean one = new ComponentSFCBean(
    						componentStr[0],  //fdate
    						componentStr[1],   //idstation
    						Integer.parseInt(componentStr[2]),
    						componentStr[3],   //cmodel
    						componentStr[4],   //boardsn
    						componentStr[5],
    						componentStr[6],
    						Long.parseLong(componentStr[7]),
    						Float.parseFloat(componentStr[8]),
    						Float.parseFloat(componentStr[9]),
    						Float.parseFloat(componentStr[10]),
    						Float.parseFloat(componentStr[11]),
    						Float.parseFloat(componentStr[12]),
    						Integer.parseInt(componentStr[13]),
    						Integer.parseInt(componentStr[14]),
    						componentStr[15],
    						componentStr[16],
    						componentStr[17],
    						Long.parseLong(componentStr[18]),
    						componentStr[19],
    						Float.parseFloat(componentStr[20]),
    						componentStr[21]
    						);
    //				System.out.println("========单条数据java对象");
    //				System.out.println(one.toString());
    				//4. 保留一个Key
    				String key = componentStr[0]+componentStr[1]+componentStr[3]+componentStr[4];
    //				System.out.println("========kafka的Key:");
    //				System.out.println(key);
    				//5. 将对象转化成json类型
    				String value = JSONObject.toJSONString(one);
    //				System.out.println("========kafka的value:json对象");
    //				System.out.println(value);
    				//5. 进行kafka的提交
    				KafkaProducer kProducer =  new KafkaProducer("huyue-topic");
    				kProducer.uploadData(key, value);
                    //6. 关闭kafka连接!!!
    				kProducer.closeClient();
    			}
    			
    		}catch (Exception e) {
    			// TODO: handle exception
    			e.printStackTrace();
    		}finally {
    			if(textFile == null) {
    				try {
    					textFile.close();
    				} catch (IOException e) {
    					// TODO Auto-generated catch block
    					e.printStackTrace();
    				}
    			}
    		}
    	}
    }
    
    
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值