描绘产品所有用户的访问的功能路径树,话就不多说,闷得慌
本文采用了俩种实现:一种是sql,一种是hadoop数据处理,第二种的处理在一些细节上会和第一种方式有所不同。
本来是利用传统图论,构建一个链接表的DAG图,但是效率不高,容易内存溢出,后面采用字符串的方式简单高效,
这又是偷巧的方式,哈哈
sql 代码
ALTER procedure [dbo].[PR_StatFuncPath](@statdate int)
as
begin
declare @today datetime;
set @today=CAST(convert(varchar(8),@statdate) as datetime);
declare @yestoday datetime;
set @yestoday=dateadd(DAY,-1,@today);
declare @yesInt int;
set @yesInt=CAST(CONVERT(varchar(8),@yestoday,112) as int);
declare @tomorrow datetime;
set @tomorrow=dateadd(DAY,1,@today);
declare @tomorrowInt int;
set @tomorrowInt=CAST(CONVERT(varchar(8),@tomorrow,112) as int);
declare @tomorrowpathTable varchar(100);
set @tomorrowpathTable = 'MobileFunctionLog_' + CAST( YEAR(@tomorrow) as nvarchar(10));
declare @yespathTable varchar(100);
set @yespathTable = 'MobileFunctionLog_' + CAST(YEAR(@yestoday) as nvarchar(10));
declare @pathTable varchar(100);
set @pathTable = 'MobileFunctionLog_' + CAST(YEAR(@today) as nvarchar(10));
declare @sql nvarchar(max);
declare @part tinyint;
set @part=0;
create table #resultbyversion(softid int,platform int,versionid int,functionid int,Pid int,ordernum int,functionpath varchar(200),optiontimes int);
create table #result(softid int,platform int,functionid int,Pid int,ordernum int,functionpath varchar(200),optiontimes int)
while(@part<128)
begin
--得出昨天会话
set @sql=' select distinct SoftID,Platform,VersionID,SessionID,IMEI
into ##tempyes
from '+ @yespathTable+
' where OptionDate=@yesInt and Part=@part';
EXEC sp_executesql @sql,N'@part tinyint,@yesInt int',@part,@yesInt;
--得出明天的会话
set @sql=' select distinct SoftID,Platform,VersionID,SessionID,IMEI
into ##temptomorrow
from '+ @tomorrowpathTable+
' where OptionDate=@tomorrowInt and Part=@part';
EXEC sp_executesql @sql,N'@part tinyint,@tomorrowInt int',@part,@tomorrowInt;
set @sql='select distinct SoftID, Platform, VersionID, SessionID, FunctionID, IMEI, OptionTime,NeedRecord
into ##OneDayData
from '+@pathTable+
' where OptionDate=@statdate and Part=@part'
EXEC sp_executesql @sql,N'@part tinyint,@statdate int',@part,@statdate;
Delete A from
##OneDayData A inner join
##tempyes B
on A.SoftID=B.SoftID AND A.Platform=B.Platform And A.VersionID=B.VersionID
AND A.SessionID=B.SessionID AND A.IMEI=B.IMEI;
Delete A from
##OneDayData A inner join
##temptomorrow B
on A.SoftID=B.SoftID AND A.Platform=B.Platform And A.VersionID=B.VersionID
AND A.SessionID=B.SessionID AND A.IMEI=B.IMEI;
---上述俩处完成清理缓存数据和脏数据
drop table ##tempyes;
drop table ##temptomorrow;
select SoftID, Platform, VersionID, SessionID, FunctionID, IMEI, OptionTime,NeedRecord,
ROW_NUMBER() over( partition by SoftID, Platform, VersionID, SessionID, IMEI Order By OptionTime asc) ordernum
into #LastOneData
from ##OneDayData ;
drop table ##OneDayData;
--得出标准路径 (干掉了F1->F1->F2 这种非标准路径)
with etc2 as
(
select SoftID,Platform,VersionID,SessionId,FunctionID,IMEI,OptionTime,NeedRecord,OrderNum,isnull(Pid,0) Pid from
(
select A.SoftID, A.Platform, A.VersionID, A.SessionId, A.FunctionID, A.IMEI, A.OptionTime,A.NeedRecord,
A.OrderNum,B.FunctionID Pid
from #LastOneData A
left join #LastOneData B
ON A.SoftID=B.SoftID AND A.Platform=B.Platform And A.VersionID=B.VersionID
AND A.SessionID=B.SessionID AND A.IMEI=B.IMEI And A.ordernum=B.ordernum+1
) A where (FunctionID!=Pid or Pid is null)
)
select * into #etc2 from etc2;
---路径长度大于20的不要
select SoftID, Platform, VersionID, SessionId, FunctionID, IMEI, OptionTime,NeedRecord,Pid,OrderNum
into #GroupData from
(
select SoftID, Platform, VersionID, SessionId, FunctionID, IMEI, OptionTime,NeedRecord,Pid ,
ROW_NUMBER() over(PARTITION by SoftID, Platform, VersionID, SessionId, IMEI order by OptionTime asc ) OrderNum
from #etc2
) a where OrderNum<20;
drop table #etc2;
--递归得出路径结果
with etc3 as
(
select *,CAST(FunctionID as varchar(200)) FunctionPath from #GroupData where PID=0
UNION ALL
select A.*,CAST((B.FunctionPath +'-'+ CAST(A.FUNCTIONID as varchar(12))) AS VARCHAR(200) ) FunctionPath from #GroupData A
INNER JOIN etc3 B
on A.SessionID=B.SessionID and A.SoftID=B.SoftID and A.Platform=B.platform and A.versionid=B.versionid
and A.Imei=B.Imei and A.OrderNum-B.OrderNum=1
)
select * into #temppathdata from etc3;
insert into #resultbyversion
select SoftID,Platform,VersionID,FunctionID,Pid,OrderNum, FunctionPath,sum(NeedRecord) OptionTimes
from #temppathdata
group by SoftID,Platform,VersionID,FunctionID,Pid,OrderNum,FunctionPath;
insert into #result
select SoftID,Platform,FunctionID,Pid,OrderNum, FunctionPath,sum(NeedRecord) OptionTimes
from #temppathdata
group by SoftID,Platform,FunctionID,Pid,OrderNum,FunctionPath;
drop table #GroupData;
drop table #LastOneData;
drop table #temppathdata;
set @part=@part+1;
print(@part);
end
select SoftID,Platform,VersionID,FunctionID,PID,OrderNum,FunctionPath,SUM(OptionTimes) OptionTimes
into ##tmp_FuncPathByVersion
from #resultbyversion
group by SoftID,Platform,VersionID,FunctionID,OrderNum,Pid,FunctionPath
having SUM(OptionTimes)>1
select SoftID,Platform,FunctionID,PID,OrderNum,FunctionPath,SUM(OptionTimes) OptionTimes
into ##tmp_FuncPath
from #result
group by SoftID,Platform,FunctionID,OrderNum,Pid,FunctionPath
having SUM(OptionTimes)>1
DROP TABLE #result;
DROP TABLE #resultbyversion;
end
java 代码
@MapConfig
public static class MapTask extends Mapper<LongWritable, Text, Text, Text> {
private Text mKey = new Text();
private Text mValue = new Text();
private StringBuilder sb = new StringBuilder();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String str = value.toString();
String[] params = StringUtils.splitByWholeSeparatorPreserveAllTokens(str, "\t");
//过滤数据
if (params.length < 21 || params[12].equals("0") || !params[11].equals("False")) {
return;
}
sb.delete(0, sb.length());
//key:imei 0,softid 2,softversion 3,platform 4,sessionid 12
//value:functionid 7,actiontime 9
sb.append(params[0]).append("\t")
.append(params[2]).append("\t")
.append(params[3]).append("\t")
.append(params[4]).append("\t")
.append(params[12]);
mKey.set(sb.toString());
mValue.set(params[7] + "\t" + params[9]);
context.write(mKey, mValue);
}
}
@ReduceConfig
public static class ReduceTask extends Reducer<Text, Text, Text, IntWritable> {
private Text rKey = new Text();
private IntWritable rValue = new IntWritable();
private String[] valItem = new String[2];
private int i = 0;
//key:actiontime value:lastfunctionid firstfunctionid;
private SortedMap<String, String> map = new TreeMap<String, String>();
String pid = "";
//lastid
private String valTemp;
private String firstid;
private StringBuilder sbpathvalue = new StringBuilder();
//key:road,value:functionid
private Map<String, Integer> mRoads = new HashMap<String, Integer>();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
map.clear();
mRoads.clear();
boolean isfirstid = true;
sbpathvalue.delete(0, sbpathvalue.length());
i = 0;
for (Text item : values) {
valItem = item.toString().split("\t");
if (map.containsKey(valItem[1]) || valItem[1].equals("")) {
continue;
}
map.put(valItem[1], valItem[0]);
}
//添加退出功能点
map.put(System.currentTimeMillis() + "", "-1");
pid = "";
valTemp = "";
Iterator it = map.entrySet().iterator();
//用户一次操作完整路径才计算路径长度
int pathlengtn = 0;
while (it.hasNext()) {
Map.Entry<String, String> entry = (Map.Entry<String, String>) it.next();
if (isfirstid) {
firstid = entry.getValue();
isfirstid = false;
}
pathlengtn = 0;
// //过滤掉F1->F1
// if (valTemp.equals(entry.getValue())){
// continue;
// }
valTemp = entry.getValue();
//算法关键点
int start = sbpathvalue.indexOf(valTemp);
if (start >= 0) {
sbpathvalue = sbpathvalue.replace(start + valTemp.length(), sbpathvalue.length(), "");
//continue;
}
if (i != 0 && firstid.equals(valTemp)) {
i = 0;
sbpathvalue.delete(0, sbpathvalue.length());
}
if (i == 0) {
sbpathvalue.append(valTemp);
pid = valTemp;
} else {
if (start == -1) {
sbpathvalue.append("_").append(valTemp);
}
int first = sbpathvalue.indexOf("_");
int last = sbpathvalue.lastIndexOf("_");
if (last == -1) {
pid = sbpathvalue.toString();
} else {
pid = sbpathvalue.substring(first == last ? 0 : sbpathvalue.substring(0, last).lastIndexOf("_") + 1, last);
}
}
//表示此路径为完整路
if (valTemp.equals("-1")) {
if (sbpathvalue.toString().contains("_")) {
pathlengtn = sbpathvalue.toString().split("_").length;
}
}
String roadkey = pid + "\t" + valTemp + "\t" + pathlengtn + "\t" + sbpathvalue;
if (!mRoads.containsKey(roadkey)) {
mRoads.put(roadkey, 1);
} else {
mRoads.put(roadkey, mRoads.get(roadkey) + 1);
}
i++;
}
if (mRoads.size() <= 0) {
return;
}
Iterator itRoads = mRoads.entrySet().iterator();
while (itRoads.hasNext()) {
Map.Entry<String, Integer> entry = (Map.Entry<String, Integer>) itRoads.next();
rKey.set(key + "\t" + entry.getKey());
rValue.set(entry.getValue());
context.write(rKey, rValue);
}
}
}