//Bean类
将javaBean类实现Serializable接口
里面的属性为:
private String o_order_no,s_scrap_name,d0_dict_text,s_unit,s_price,o_amount,o_money,d3_dict_text,ow_id,ow_name,ow_idCard, d1_dict_text,ow_tel,ow_address,sc_id,sc_name,d2_dict_text,sc_idCard,sc_tel,o_create_time;并且添加set,get方法,无参和有参构造方法
//代码
所有要缓存的文件都放在hdfs上的/flume/下
import com.utils.JedisUtil; import com.utils.SerializeUtil; import com.bean.WasteBean; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import redis.clients.jedis.Jedis; import java.io.IOException; public class HDFS_Redis { private static Jedis jedis = JedisUtil.newJedis(); public static class MyMapper extends Mapper<Object, Text, IntWritable, Text> { @Override protected void map(Object key, Text value, Context context) { String[] strings = value.toString().split("\t"); if (value == null || "".equals(value) || strings.length != 20) return; jedis.lpush("waste".getBytes(), SerializeUtil.serialize(new WasteBean(strings[0], strings[1], strings[2], strings[3], strings[4], strings[5], strings[6], strings[7], strings[8], strings[9], strings[10], strings[11], strings[12], strings[13], strings[14], strings[15], strings[16], strings[17], strings[18], strings[19]))); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration configuration = new Configuration(); FileSystem fileSystem = FileSystem.get(configuration); FileStatus[] fileStatuses = fileSystem.listStatus(new Path("hdfs://master:9000/flume/")); for (FileStatus fileStatus : fileStatuses) { if (jedis.llen("waste") > 1000) { break; } if (fileStatus.getPath().getName().endsWith("emp")) { continue; } Path filePath = fileStatus.getPath(); Job job = Job.getInstance(configuration); job.setJarByClass(HDFS_Redis.class); job.setMapperClass(MyMapper.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, filePath); if (fileSystem.exists(new Path("hdfs://master:9000/flumeaaa"))) { fileSystem.delete(new Path("hdfs://master:9000/flumeaaa")); } FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/flumeaaa")); if (job.waitForCompletion(true)) { fileSystem.delete(filePath,true); } } } }
//所需要的两个工具类(自己写的 水平有限,请大佬多多指教)
import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; public class JedisUtil { private static JedisPool pool = null; private static Jedis jedis = newJedis(); private static JedisPool initJedisPool() { if (pool == null) { //获取服务器 获取服务器IP地址 String host = "master"; //获取服务器端口 获取服务器端口 int port = 6379; //初始化连接池 初始化连接池 pool = new JedisPool(host, port); } return pool; } public static Jedis newJedis() { jedis = initJedisPool().getResource(); jedis.auth("123456"); return jedis; }
}
import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; public class SerializeUtil { public static byte[] serialize(Object object) { ObjectOutputStream oos = null; ByteArrayOutputStream baos = null; try { // 序列化 序列化 baos = new ByteArrayOutputStream(); oos = new ObjectOutputStream(baos); oos.writeObject(object); byte[] bytes = baos.toByteArray(); return bytes; }catch (Exception e) { } return null; } public static Object unserialize(byte[] bytes) { ByteArrayInputStream bais = null; try { // 反序列化 反序列化 bais = new ByteArrayInputStream(bytes); ObjectInputStream ois = new ObjectInputStream(bais); return ois.readObject(); } catch (Exception e) { } return null; } }