Flink read data from hdfs

DataStreamSource 读取hdfs

package com.umetrip.umeflink.connector.hdfs;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.Arrays;
import java.util.List;


public class UmeHdfsSource {
 
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env1 = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> stringDataStreamSource = env1.readTextFile("hdfs://10.5.xxx.xxx:8020/test/seamless/2019-06-25/part-0-0.txt");
        // stringDataStreamSource.print();
        stringDataStreamSource.writeAsText("/Users/xxxx/testdata/").setParallelism(1);
        env1.execute();
    }
}

DataSet 批处理

package com.umetrip.umeflink.connector.hdfs;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.Arrays;
import java.util.List;


public class UmeHdfsSource {
 
    public static void main(String[] args) throws Exception {


       ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        // create a configuration object
        Configuration parameters = new Configuration();
        // set the recursive enumeration parameter
        parameters.setBoolean("recursive.file.enumeration", true);
        // read text file from a HDFS running at nnHost:nnPort
        // DataSet<String> hdfsLines = env.readTextFile("hdfs://10.5.xxx.xxx:8020/TestData");
        // 如果本地读取HA HDFS,需要把core-site.xml,hdfs-site.xml放到resources目录下,路径为hdfs://namespace/xx/xx
        //DataSet<String> hdfsLines = env.readTextFile("hdfs://10.5.xxx.xx:8020/test/seamless/2019-06-25");
        DataSet<String> hdfsLines = env.readTextFile("hdfs://10.5.xxx.xxx:8020/test/seamless/2019-06-25").withParameters(parameters).withParameters(parameters);
        hdfsLines.print();

 
}
       <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-hadoop-compatibility_2.11</artifactId>
            <version>1.8.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.8.0</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <!--<scope>provided</scope>-->
            <version>1.8.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>1.8.0</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-filesystem_2.12</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-hadoop-fs</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-shaded-hadoop2</artifactId>
        </dependency>
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值