标签生成

目的:java代码实现团购网站的标签生成

最终结果:

83644298=============>体验好:1
82317795=============>味道差:1
77705462=============>服务热情:3,羊肉:2,味道赞:1
85766086=============>价格实惠:2,上菜慢:1
74145782=============>服务热情:18,味道赞:14,上菜快:13,菜品不错:12,回头客:11,性价比高:6,停车方便:5,体验好:4,不推荐:3,服务差:2
71039150=============>团建:1
70611801=============>干净卫生:4,回头客:3,味道赞:2,肉类好:1
73963176=============>味道赞:15,价格实惠:12,分量足:11,菜品不错:10,肉类好:7,环境优雅:6,回头客:4,性价比高:3,味道一般:1
84270191=============>价格实惠:2,干净卫生:1
89223651=============>环境优雅:8,技师专业:7,干净卫生:5,服务一般:4,无办卡:3,环境一般:2
82016443=============>分量足:3,味道赞:2,服务热情:1
77287793=============>干净卫生:29,环境优雅:26,交通便利:25,性价比高:19,服务热情:18,高大上:16,停车方便:13,音响效果差:1
79197522=============>服务热情:2,价格实惠:1
83084036=============>干净卫生:1
73879078=============>饮品赞:3,回头客:2,分量足:1
88284865=============>价格实惠:1
83073343=============>干净卫生:17,味道赞:16,环境优雅:15,菜品不错:11,肉类好:9,性价比高:8,体验好:7,回头客:6,价格实惠:4,上菜慢:1
76114040=============>性价比高:1
86913510=============>午餐:1
88496862=============>回头客:5,味道赞:4,分量足:3,性价比高:2,高大上:1
78477325=============>味道赞:8,回头客:7,干净卫生:5,味道一般:4,菜品不错:3,环境优雅:2,肉类好:1
83981222=============>性价比高:4,干净卫生:3,服务热情:2
82705919=============>回头客:3,饮品赞:2,性价比高:1
87994574=============>无推销:12,价格实惠:8,服务热情:7,效果赞:5,环境优雅:4,技师专业:3,没有异味:2,效果差:1
77373671=============>菜品差:1
75144086=============>服务热情:38,效果赞:30,无办卡:22,性价比高:21,无推销:19,价格实惠:18,干净卫生:13,体验好:12,韩系风格:10,美发师手艺好:3
85648235=============>味道赞:17,服务热情:15,干净卫生:13,上菜快:12,回头客:11,性价比高:10,体验好:9,价格实惠:8,分量足:7,情侣约会:1
73607905=============>菜品不错:16,回头客:15,服务热情:14,分量足:13,肉类好:11,环境优雅:7,体验好:5,体验差:2,价格实惠:1
76893145=============>服务热情:10,环境优雅:7,高大上:5,回头客:4,温馨浪漫:3,味道一般:2,饮品赞:1
78824187=============>价格实惠:13,回头客:11,分量足:10,环境优雅:8,干净卫生:7,上菜快:6,主食赞:5,味道赞:4,服务差:1

代码:


import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;

/**
 * java版实现团购网站标签生成程序
 */
public class ReviewTagsJava {
    public static void main(String [] args){
        SparkConf conf = new SparkConf();
        conf.setMaster("local[4]");
        conf.setAppName("ReviewTagsJava");

        JavaSparkContext jsc = new JavaSparkContext(conf);
        JavaRDD<String> rdd1 = jsc.textFile("file:///d:/scala/taggen/temptags.txt");

        //以\t切割成String数组
        JavaRDD<String[]> rdd2 = rdd1.map(new Function<String, String[]>() {
            public String[] call(String s) throws Exception {
                return s.split("\t");
            }
        });

        //  过滤
        JavaRDD<String[]> rdd3 = rdd2.filter(new Function<String[], Boolean>() {
            public Boolean call(String[] v) throws Exception {
                return v.length == 2;
            }
        });

        //变换成数组,ID-->味道好,价格实惠,量足
        JavaPairRDD<String,String> rdd4 = rdd3.mapToPair(new PairFunction<String[], String,String>() {
            public Tuple2<String, String> call(String[] t) throws Exception {
                return new Tuple2<String, String>(t[0],ReviewTags.extractTags(t[1]));
            }
        });

        //过滤空评论
        JavaPairRDD<String,String> rdd5 = rdd4.filter(new Function<Tuple2<String, String>, Boolean>() {
            public Boolean call(Tuple2<String, String> t) throws Exception {
                return t._2.length() > 0;
            }
        });

        //对V进行切割,V形成数组
        JavaPairRDD<String,String[]> rdd6 = rdd5.mapToPair(new PairFunction<Tuple2<String,String>, String,String[]>() {
            public Tuple2<String, String[]> call(Tuple2<String, String> v) throws Exception {
                return new Tuple2<String, String[]>(v._1(),v._2().split(","));
            }
        });

        //V数组压扁,形成集合。ID->味道好  ID->价格实惠  .......
        JavaPairRDD<String,String > rdd7 = rdd6.flatMapValues(new Function<String[], Iterable<String>>() {
            public Iterable<String> call(String[] v) throws Exception {
                List<String> list = new ArrayList<String>();
                for(String  v1 : v){
                    list.add(v1);
                }
                return list;
            }
        });

        //K:ID 味道好     V:1......
        JavaPairRDD<Tuple2<String, String>, Integer> rdd8 = rdd7.mapToPair(new PairFunction<Tuple2<String,String>, Tuple2<String,String>, Integer>() {
            public Tuple2<Tuple2<String, String>, Integer> call(Tuple2<String, String> v) throws Exception {
                return new Tuple2<Tuple2<String, String>, Integer>(v,1);
            }
        });

        JavaPairRDD<Tuple2<String,String>,Integer> rdd9 = rdd8.reduceByKey(new Function2<Integer, Integer, Integer>() {
            public Integer call(Integer v1, Integer v2) throws Exception {
                return v1 + v2;
            }
        });

        //K:ID      V:味道好,1 ......
        JavaPairRDD<String, Tuple2<String, Integer>> rdd10 = rdd9.mapToPair(new PairFunction<Tuple2<Tuple2<String,String>,Integer>, String, Tuple2<String,Integer>>() {
            public Tuple2<String, Tuple2<String, Integer>> call(Tuple2<Tuple2<String, String>, Integer> v) throws Exception {
                return new Tuple2<String, Tuple2<String, Integer>>(v._1()._1(),new Tuple2<String, Integer>(v._1()._2(),v._2()));
            }
        });

        // V变成集合,以备聚合
        JavaPairRDD<String,List<Tuple2<String,Integer>>> rdd11 = rdd10.mapToPair(new PairFunction<Tuple2<String,Tuple2<String,Integer>>, String, List<Tuple2<String,Integer>>>() {
            public Tuple2<String, List<Tuple2<String, Integer>>> call(Tuple2<String, Tuple2<String, Integer>> v) throws Exception {
                List<Tuple2<String,Integer>> list = new ArrayList<Tuple2<String, Integer>>();
                list.add(v._2());
                 return new Tuple2<String, List<Tuple2<String, Integer>>>(v._1(),list);
            }
        });
        //
        JavaPairRDD<String,List<Tuple2<String,Integer>>> rdd12 = rdd11.reduceByKey(new Function2<List<Tuple2<String, Integer>>, List<Tuple2<String, Integer>>, List<Tuple2<String, Integer>>>() {
            public List<Tuple2<String, Integer>> call(List<Tuple2<String, Integer>> v1, List<Tuple2<String, Integer>> v2) throws Exception {
                v1.addAll(v2);
                return  v1;
            }
        });

        //排序降序,取前10评论
        JavaPairRDD<String,String> rdd13 = rdd12.mapToPair(new PairFunction<Tuple2<String,List<Tuple2<String,Integer>>>, String, String>() {
            public Tuple2<String, String> call(Tuple2<String, List<Tuple2<String, Integer>>> v) throws Exception {
                //降序,
                TreeSet<Tuple2<String,Integer>> ts = new TreeSet<Tuple2<String, Integer>>(new Tuple2Comparator());
                ts.addAll(v._2());
                //前10迭代
                Iterator<Tuple2<String,Integer>> it = ts.iterator();
                int index = 0;
                String str = "";
                while(it.hasNext()){
                    if(index > 9){
                        break;
                    }
                    //迭代出前10的元组给t0
                    Tuple2<String,Integer> t0 = it.next();
                    //V:味道好 :12,量大:13
                    str = str + t0._1() + ":" + t0._2() + ",";
                    index++;
                }
                //去掉最后一个","
                str = str.substring(0,str.length()-1);
                //K:ID,       V:味道好+12评论数量
                return  new Tuple2<String, String>(v._1(),str);
            }
        });
        //collect执行---->List
        List<Tuple2<String,String>> list = rdd13.collect();
        //循环打印结果
        for(Tuple2<String,String> l : list){
            System.out.println(l._1() + "=============>" + l._2());
        }
    }
}

降序取出前10评论需要调用对比器:

import scala.Tuple2;

import java.util.Comparator;

/**
 * 对比器,比较大小,降序
 */
public class Tuple2Comparator implements Comparator<Tuple2<String,Integer>> {
    public int compare(Tuple2<String, Integer> o1, Tuple2<String, Integer> o2) {
        return o2._2() - o1._2();
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值