将hdfs上的多个文件内容定量缓存到redis

最新推荐文章于 2024-04-14 15:56:10 发布

csdn_Hzx

最新推荐文章于 2024-04-14 15:56:10 发布

阅读量1.9k

点赞数

分类专栏：大数据文章标签：大数据 redis hdfs

本文链接：https://blog.csdn.net/CSDN_Hzx/article/details/80851659

版权

大数据专栏收录该内容

42 篇文章 0 订阅

订阅专栏

//Bean类

将javaBean类实现Serializable接口

里面的属性为:

private String o_order_no,s_scrap_name,d0_dict_text,s_unit,s_price,o_amount,o_money,d3_dict_text,ow_id,ow_name,ow_idCard,
d1_dict_text,ow_tel,ow_address,sc_id,sc_name,d2_dict_text,sc_idCard,sc_tel,o_create_time;

并且添加set,get方法,无参和有参构造方法

//代码

所有要缓存的文件都放在hdfs上的/flume/下

import com.utils.JedisUtil;
import com.utils.SerializeUtil;
import com.bean.WasteBean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import redis.clients.jedis.Jedis;

import java.io.IOException;


public class HDFS_Redis {
    private static Jedis jedis = JedisUtil.newJedis();
    public static class MyMapper extends Mapper<Object, Text, IntWritable, Text> {
        @Override
        protected void map(Object key, Text value, Context context) {
            String[] strings = value.toString().split("\t");
            if (value == null || "".equals(value) || strings.length != 20)
                return;
            jedis.lpush("waste".getBytes(), SerializeUtil.serialize(new WasteBean(strings[0], strings[1],
                    strings[2], strings[3], strings[4], strings[5], strings[6], strings[7], strings[8], strings[9],
                    strings[10], strings[11], strings[12], strings[13], strings[14], strings[15], strings[16],
                    strings[17], strings[18], strings[19])));
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = new Configuration();
        FileSystem fileSystem = FileSystem.get(configuration);
        FileStatus[] fileStatuses = fileSystem.listStatus(new Path("hdfs://master:9000/flume/"));
        for (FileStatus fileStatus : fileStatuses) {
            if (jedis.llen("waste") > 1000) {
                break;
            }
            if (fileStatus.getPath().getName().endsWith("emp")) {
                continue;
            }
            Path filePath = fileStatus.getPath();
            Job job = Job.getInstance(configuration);
            job.setJarByClass(HDFS_Redis.class);
            job.setMapperClass(MyMapper.class);
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(Text.class);
            FileInputFormat.addInputPath(job, filePath);
            if (fileSystem.exists(new Path("hdfs://master:9000/flumeaaa"))) {
                fileSystem.delete(new Path("hdfs://master:9000/flumeaaa"));
            }
            FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/flumeaaa"));
            if (job.waitForCompletion(true)) {
                fileSystem.delete(filePath,true);
            }
        }


    }
}

//所需要的两个工具类(自己写的水平有限,请大佬多多指教)

import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;

public class JedisUtil {
    private static JedisPool pool = null;
    private static Jedis jedis = newJedis();
    private static JedisPool initJedisPool() {
        if (pool == null) {
            //获取服务器 获取服务器IP地址
            String host = "master";   //获取服务器端口 获取服务器端口
            int port = 6379;   //初始化连接池 初始化连接池
            pool = new JedisPool(host, port);
        }
        return pool;
    }

    public static Jedis newJedis() {
        jedis = initJedisPool().getResource();
        jedis.auth("123456");
        return jedis;
    }

}

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;

public class SerializeUtil {
    public static byte[] serialize(Object object) {
        ObjectOutputStream oos = null;
        ByteArrayOutputStream baos = null;  try {
            // 序列化 序列化
             baos = new ByteArrayOutputStream();
             oos = new ObjectOutputStream(baos);
             oos.writeObject(object);
             byte[] bytes = baos.toByteArray();
             return bytes;
             }catch (Exception e) {

        }
            return null; }
    public static Object unserialize(byte[] bytes) {
        ByteArrayInputStream bais = null;
        try {   // 反序列化 反序列化
            bais = new ByteArrayInputStream(bytes);
            ObjectInputStream ois = new ObjectInputStream(bais);
            return ois.readObject();
        } catch (Exception e) {
        }
        return null;
    }
}

csdn_Hzx

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
1
评论
将hdfs上的多个文件内容定量缓存到redis

//Bean类将javaBean类实现Serializable接口里面的属性为:private String o_order_no,s_scrap_name,d0_dict_text,s_unit,s_price,o_amount,o_money,d3_dict_text,ow_id,ow_name,ow_idCard,d1_dict_text,ow_tel,ow_address,sc_id,s...
复制链接

扫一扫