在使用aggregate()函数的时候,一直报错,先贴代码吧
一直不知道是什么原因,只是说方法不匹配,想想可能是keyBy函数的返回key值类型不对,改成String类型,果然是这个问题。
因为是java写,所以要注意这个问题:
scala好像不需要注意这个点 :
使用对象:
package com.aliyun.market; import com.alibaba.fastjson.JSON; import org.apache.flink.api.common.functions.AggregateFunction; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.java.tuple.Tuple; import org.apache.flink.api.java.tuple.Tuple1; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.TimeCharacteristic; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.KeyedProcessFunction; import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor; import org.apache.flink.streaming.api.functions.windowing.WindowFunction; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.streaming.api.windowing.windows.TimeWindow; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; import org.apache.flink.util.Collector; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Properties; /** * 热门商品topN统计 * 数据下载 curl https://raw.githubusercontent.com/wuchong/my-flink-project/master/src/main/resources/UserBehavior.csv > UserBehavior.csv * 数据在resource 下 UserBehavior.csv * 参考: http://wuchong.me/blog/2018/11/07/use-flink-calculate-hot-items/ * <p> * 知识点: aggregate函数 是专门来做统计的,一般跟着keyBy()后面 * <p> * 官网使用 : aggregate(SUM, 0).and(MIN, 2) */ public class HotTopDemo { public static void main(String[] args) throws Exception { // todo 1,读取kafka数据 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //todo 获取kafka的配置属性 args = new String[]{"--input-topic", "user_behavior", "--bootstrap.servers", "node2.hadoop:9091,node3.hadoop:9091", "--zookeeper.connect", "node1.hadoop:2181,node2.hadoop:2181,node3.hadoop:2181", "--group.id", "cc2"}; ParameterTool parameterTool = ParameterTool.fromArgs(args); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // Properties pros