Trident实战之计算网站PV

[size=large]1、Trident实战之计算网站PV[/size]


/**
* Trident实战之计算网站PV
*/
public class TridentPVTopo {

public static StormTopology buildTopology(LocalDRPC drpc) {

Random random = new Random();
String[] hosts = { "www.taobao.com" };
String[] session_id = { "ABYH6Y4V4SCVXTG6DPB4VH9U123", "XXYH6YCGFJYERTT834R52FDXV9U34",
"BBYH61456FGHHJ7JL89RG5VV9UYU7", "CYYH6Y2345GHI899OFG4V9U567", "VVVYH6Y4V4SFXZ56JIPDPB4V678" };
String[] time = { "2014-01-07 08:40:50", "2014-01-07 08:40:51", "2014-01-07 08:40:52", "2014-01-07 08:40:53",
"2014-01-07 09:40:49", "2014-01-07 10:40:49", "2014-01-07 11:40:49", "2014-01-07 12:40:49" };

FixedBatchSpout spout = new FixedBatchSpout(new Fields("eachLog"), 3, // 第一个参数表示输出类型,与topo的输入类型对应,第二个参数表示以三行作为一个批次
new Values(hosts[0] + "\t" + session_id[random.nextInt(5)] + "\t" + time[random.nextInt(8)]),
new Values(hosts[0] + "\t" + session_id[random.nextInt(5)] + "\t" + time[random.nextInt(8)]),
new Values(hosts[0] + "\t" + session_id[random.nextInt(5)] + "\t" + time[random.nextInt(8)]));
spout.setCycle(false);

/**
* topo处理数据,存入中间存储
*/
TridentTopology topology = new TridentTopology();
TridentState wordCounts = topology.newStream("spout1", spout)// 获取数据源
.each(new Fields("eachLog"), new Mysplit("\t"), new Fields("date", "session_id"))// 第一参数是输入数据类型,第二参数是实现“分割”功能,第三个参数是输出数据类型
.groupBy(new Fields("date"))// 按日期分组

// 持久化到内存,传入session_id分组,输出pv数据类型
.persistentAggregate(new MemoryMapState.Factory(), new Fields("session_id"), new Count(),
new Fields("pv"));

// .parallelismHint(16);

/**
* 读取中间存储数据
*/
topology.newDRPCStream("GetPV", drpc)// 输入函数名称
.each(new Fields("args"), new Split(" "), new Fields("date"))// 对传入参数进行“分割”处理,
.groupBy(new Fields("date"))// 用日期进行查询
.stateQuery(wordCounts, new Fields("date"), new MapGet(), new Fields("PV"))// 第一个参数是中间存储。第二个参数是查询参数,可以不输入默认是传入流的值。
.each(new Fields("PV"), new FilterNull());// 查询结果过滤
return topology.build();
}

public static void main(String[] args) throws Exception {
// 客户端调用topo
Config conf = new Config();
conf.setMaxSpoutPending(20);
if (args.length == 0) {
LocalDRPC drpc = new LocalDRPC();
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("wordCounter", conf, buildTopology(drpc));
for (int i = 0; i < 100; i++) {
System.err.println("DRPC RESULT: " + drpc.execute("GetPV", "2014-01-07 2014-01-08"));
Thread.sleep(1000);
}
} else {
conf.setNumWorkers(3);
StormSubmitter.submitTopologyWithProgressBar(args[0], conf, buildTopology(null));
}
}
}



[size=large]2、自定义分割数据[/size]


public class Mysplit extends BaseFunction {

/**
* 自定义分割数据
*/
private static final long serialVersionUID = 1L;

String patton = null;

public Mysplit(String patton) {
this.patton = patton;
}

public void execute(TridentTuple tuple, TridentCollector collector) {
String log = tuple.getString(0);
String logArr[] = log.split(patton);
if (logArr.length == 3) {
collector.emit(new Values(DateFmt.getCountDate(logArr[2], DateFmt.date_short), logArr[1]));
}

}

}





public class Split extends BaseFunction {
/**
* 分割数据
*/
private static final long serialVersionUID = 1L;

String patton = null;

public Split(String patton)
{
this.patton = patton;
}


public void execute(TridentTuple tuple, TridentCollector collector) {
String sentence = tuple.getString(0);
for (String word : sentence.split(patton)) {
collector.emit(new Values(word));
}
}
}



[size=large]3、日期处理类[/size]


public class DateFmt {
/*
* 日期处理类
*/
public static final String date_long = "yyyy-MM-dd HH:mm:ss" ;
public static final String date_short = "yyyy-MM-dd" ;

public static SimpleDateFormat sdf = new SimpleDateFormat(date_short);

public static String getCountDate(String date,String patton)
{
SimpleDateFormat sdf = new SimpleDateFormat(patton);
Calendar cal = Calendar.getInstance();
if (date != null) {
try {
cal.setTime(sdf.parse(date)) ;
} catch (ParseException e) {
e.printStackTrace();
}
}
return sdf.format(cal.getTime());
}

public static Date parseDate(String dateStr) throws Exception
{
return sdf.parse(dateStr);
}

public static void main(String[] args) throws Exception{

// System.out.println(DateFmt.getCountDate("2014-03-01 12:13:14", DateFmt.date_short));
System.out.println(parseDate("2014-05-02").after(parseDate("2014-05-01")));
}

}



[size=large]4、pom文件[/size]
[quote]
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.test</groupId>
<artifactId>StormMavenProject</artifactId>
<packaging>jar</packaging>
<version>0.0.1-SNAPSHOT</version>
<name>StormMavenProject</name>
<url>http://maven.apache.org</url>
<dependencies>

<dependency>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
<version>5.0.3</version>
</dependency>
<dependency>
<groupId>org.clojure</groupId>
<artifactId>clojure</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>com.lmax</groupId>
<artifactId>disruptor</artifactId>
<version>3.3.2</version>
</dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>kryo</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.8</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
<version>1.6.6</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.8</version>
</dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>minlog</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>org.objenesis</groupId>
<artifactId>objenesis</artifactId>
<version>2.1</version>
</dependency>
<dependency>
<groupId>com.esotericsoftware</groupId>
<artifactId>reflectasm</artifactId>
<version>1.10.1</version>
</dependency>

<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.21</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-rename-hack</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>ring-cors</groupId>
<artifactId>ring-cors</artifactId>
<version>0.1.5</version>
</dependency>


<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.10</version>
</dependency>



</dependencies>
<build>
<finalName>StormMavenProject</finalName>
</build>
</project>

[/quote]

[size=large]5、测试结果[/size]
[quote]
DRPC RESULT: [["2014-01-07 2014-01-08","2014-01-07",3]]
DRPC RESULT: [["2014-01-07 2014-01-08","2014-01-07",3]]
DRPC RESULT: [["2014-01-07 2014-01-08","2014-01-07",3]]
[/quote]
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值