以下代码是www 2017 poster 的代码公布,实验数据如下面的一个用户的数据:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace single_user_movie
{
public class SequenceEle
{
public string uid;
public string tid;
public int tIndex;
public DateTime time;
public int timeIndex;
}
class Program
{
static void Main(string[] args)
{
//将其一次写入文件中
var in1 = "";
var in2 = "";
//两个输入路径
List <string> ls = new List<string>();
var seqq = File.ReadAllLines(in1).Select(
dp =>
{
try
{
var sp = dp.Split('\t');
return new SequenceEle()
{
uid = sp[0],
time = DateTime.Parse(sp[2]),
tid = sp[1],
};
}
catch (Exception e)
{
return null;
}
}).Where(dp => dp != null).ToList();
var seqq1 = File.ReadAllLines(in2).Select(
dp =>
{
try
{
var sp = dp.Split('\t');
if (!ls.Contains(sp[0]))
ls.Add(sp[0]);
return new SequenceEle()
{
uid = sp[0],
time = DateTime.Parse(sp[2]),
tid = sp[1],
};
}
catch (Exception e)
{
return null;
}
}).Where(dp => dp != null).ToList();
//临时存放文件和最后文件结果
var input = "";
var input1 = "";
var outputFile = "";
List<string> res = new List<string>();
foreach (string st in ls)
{
//将准备好的数据输出至临时文件中
StreamWriter sw = new StreamWriter(input);
StreamWriter sw1 = new StreamWriter(input1);
foreach (SequenceEle sq in seqq1)
{
if (sq.uid.Equals(st))
{
sw1.WriteLine(sq.uid.ToString() + '\t' + sq.tid + '\t' + sq.time.ToString());
}
}
sw1.Close();
var seq1 = File.ReadAllLines(input1).Select(dp =>
{
try
{
var sp = dp.Split('\t');
return new SequenceEle()
{
uid = sp[0],
time = DateTime.Parse(sp[2]),
tid = sp[1]
};
}
catch (Exception e)
{
return null;
}
}).Where(dp => dp != null)
.OrderBy(dp => dp.time)
.ToList();
DateTime time1 = seq1[0].time;
DateTime time2 = seq1.Last().time;
foreach (SequenceEle sq in seqq)
{
if (sq.uid.Equals(st) && DateTime.Compare(time1, sq.time) <= 0 && DateTime.Compare(time2, sq.time) >= 0)
{
sw.WriteLine(sq.uid.ToString() + '\t' + sq.tid + '\t' + sq.time.ToString());
}
}
sw.Close();
var seq = File.ReadAllLines(input).Select(dp =>
{
try
{
var sp = dp.Split('\t');
return new SequenceEle()
{
uid = sp[0],
time = DateTime.Parse(sp[2]),
tid = sp[1]
};
}
catch (Exception e)
{
return null;
}
}).Where(dp => dp != null)
.OrderBy(dp => dp.time)
.ToList();
if (seq.Count <= 500 && seq1.Count <= 500)
{
int index = 0;
var tidIndexDic = seq.Select(dp => dp.tid).Distinct().ToDictionary(dp => dp, dp => index++);
for (int i = 0; i < seq.Count; i++)
{
seq[i].timeIndex = i;
seq[i].tIndex = tidIndexDic[seq[i].tid];
}
var selfMatrix = DCN.ComputeSelf(seq);
int n = seq.Count();
int m = tidIndexDic.Count();
int K = 9;
int index1 = 0;
var tidIndexDic1 = seq1.Select(dp => dp.tid).Distinct().ToDictionary(dp => dp, dp => index1++);
for (int i = 0; i < seq1.Count; i++)
{
seq1[i].timeIndex = i;
seq1[i].tIndex = tidIndexDic1[seq1[i].tid];
}
var selfMatrix1 = DCN.ComputeSelf(seq1);
int n1 = seq1.Count();
int m1 = tidIndexDic1.Count();
int K1 = 9;
Inference.InitialParameter(m, n - 1, K);
Inference.InitialParameter1(m1, n1 - 1, K1);
for (int i = 0; i < 80; i++)
{
try
{
Inference.GibbsSamplingEach(seq.Take(n - 1).ToList(), selfMatrix);
}
catch (Exception e)
{
}
try
{
Inference.GibbsSamplingEach1(seq1.Take(n1 - 1).ToList(), selfMatrix1);
}
catch (Exception e)
{
}
}
// predict
var predicts = new List<Tuple<int, double>>();
for (int x = 0; x < m1; x++)
{
double p = 0.0;
for (int k = 0; k < K; k++)
{
try
{
p += Math.Pow(10, 10) * Inference.theta.P[k] * Inference.phi_t[seq1[n1 - 2].tIndex].P[x] *
Math.Exp(
(-1) * Math.Pow((k + 1 - selfMatrix1[n1 - 2][x]
), 2)
);
}
catch (Exception e)
{
}
}
predicts.Add(Tuple.Create(x, p));
}
predicts = predicts.OrderByDescending(dp => dp.Item2).ToList();
double mean = 0.0;
for (int k = 0; k < 10; k++)
{
try
{
mean += (k + 1) * Inference.theta.P[k];
}
catch (Exception e)
{
}
}
var MAP = 0.0;
for (int i = 0; i < predicts.Count(); i++)
{
try
{
if (tidIndexDic1.Where(dp => dp.Value == predicts.ElementAt(i).Item1).First().Key == seq1[n1 - 1].tid)
{
MAP = i + 1;
}
}
catch (Exception e)
{
}
}
int ns = (int)Math.Round(mean);
try { res.Add(seq1.ElementAt(0).uid.ToString() + '\t' + ns.ToString() + '\t' + MAP.ToString()); } catch (Exception e) { }
}
}
File.WriteAllLines(outputFile, res);
//此时预留的接口是用户ID,猎奇水平、NDCG以及排序
}
}
public class DCN
{
public static int[][] ComputeSelf(List<SequenceEle> seq)
{
int n = seq.Count();
int m = seq.Select(dp => dp.tid).Distinct().Count();
int[][] matrix = new int[n][];
for (int i = 0; i < n; i++)
matrix[i] = new int[m];
for (int i = 0; i < n; i++)
{
if (i == 0)
{
for (int j = 0; j < m; j++)
matrix[i][j] = 1;
}
else
{
var dic = new Dictionary<int, int>();
for (int j = 0; j < m; j++)
{
dic.Add(j, 1);
for (int k = 0; k < i; k++)
{
//解析出第k次的tid的每一项
var str = seq[k].tid.Split(';');
//解析出第j次的tid的每一项
var str1 = seq[j].tid.Split(';');
//如果j中的tid项的任意一个出现在第k项中的每一项,则进行加一
foreach (string str2 in str1)
{
if (str.Contains(str2))
{
dic[j]++;
}
}
}
for (int k = 1; k < i; k++)
{
//计算转移
//解析出第k次的tid的每一项
var str = seq[k].tid.Split(';');
//解析出第j次的tid的每一项
var str1 = seq[j].tid.Split(';');
var str3 = seq[k - 1].tid.Split(';');
//解析出第j次的tid的每一项
var str4 = seq[i - 1].tid.Split(';');
bool flag1 = false;
bool flag2 = false;
foreach (string str2 in str1)
{
if (str.Contains(str2))
{
flag1 = true;
}
}
foreach (string str5 in str4)
{
if (str3.Contains(str5))
{
flag2 = true;
}
}
if (flag1 == true && flag2 == true)
dic[j]++;
}
}
dic = dic.OrderByDescending(dp => dp.Value).ToDictionary(dp => dp.Key, dp => dp.Value);
var val = 1;
matrix[i][dic.First().Key] = 1;
var preFre = dic.First().Value;
for (int index = 1; index < dic.Count; index++)
{
if (dic.ElementAt(index).Value != preFre)
{
val++;
preFre = dic.ElementAt(index).Value;
}
matrix[i][dic.ElementAt(index).Key] = val;
}
}
}
return matrix;
}
}
public class Inference
{
public static int n;
public static int m;
public static int n1;
public static int m1;
public static int k;
public static double[] alpha_s;
public static double[] alpha_t;
public static double[] beta;
// public static double[] beta_t;
public static MathNet.Numerics.Distributions.Categorical theta;
// public static MathNet.Numerics.Distributions.Categorical theta_t;
public static MathNet.Numerics.Distributions.Categorical[] phi_s;
public static MathNet.Numerics.Distributions.Categorical[] phi_t;
public static int[] zArr_s;
public static int[] zArr_t;
public static void InitialParameter(int _m, int _n, int _k)
{
m = _m + 1;
n = _n;
k = _k;
alpha_s = Enumerable.Range(0, m - 1).Select(dp => 1.0).ToArray();
beta = Enumerable.Range(0, k).Select(dp => 1.0).ToArray();
try
{
MathNet.Numerics.Distributions.Dirichlet dPhi = new MathNet.Numerics.Distributions.Dirichlet(alpha_s);
MathNet.Numerics.Distributions.Dirichlet dTheta = new MathNet.Numerics.Distributions.Dirichlet(beta);
phi_s = new MathNet.Numerics.Distributions.Categorical[m];
zArr_s = new int[n];
var p = new double[k];
for (int i = 0; i < k; i++)
p[i] = 1.0 / k;
theta = new MathNet.Numerics.Distributions.Categorical(
dTheta.Sample()
// p
);
for (int i = 0; i < n; i++)
{
zArr_s[i] = theta.Sample();
}
for (int i = 0; i < m; i++)
{
phi_s[i] = new MathNet.Numerics.Distributions.Categorical(
dPhi.Sample()
);
}
}
catch (Exception e)
{
}
}
public static void InitialParameter1(int _m, int _n, int _k)
{
m1 = _m + 1;
n1 = _n;
k = _k;
alpha_t = Enumerable.Range(0, m1 - 1).Select(dp => 1.0).ToArray();
//beta = Enumerable.Range(0, k).Select(dp => 1.0).ToArray();
try
{
MathNet.Numerics.Distributions.Dirichlet dPhi = new MathNet.Numerics.Distributions.Dirichlet(alpha_t);
MathNet.Numerics.Distributions.Dirichlet dTheta = new MathNet.Numerics.Distributions.Dirichlet(beta);
phi_t = new MathNet.Numerics.Distributions.Categorical[m1];
zArr_t = new int[n1];
var p = new double[k];
for (int i = 0; i < k; i++)
p[i] = 1.0 / k;
// theta = new MathNet.Numerics.Distributions.Categorical(
//dTheta.Sample()
// p
// );
for (int i = 0; i < n1; i++)
{
zArr_t[i] = theta.Sample();
}
for (int i = 0; i < m1; i++)
{
phi_t[i] = new MathNet.Numerics.Distributions.Categorical(
dPhi.Sample()
);
}
}
catch (Exception e)
{
}
}
public static void GibbsSampling(List<SequenceEle> seq, int[][] matrix, int iter)
{
for (int i = 0; i < iter; i++)
{
// GibbsSamplingEach(seq, matrix);
}
}
public static void GibbsSamplingEach(List<SequenceEle> seq, int[][] matrix)
{
for (int i = 0; i < n; i++)
{
//draw z
double[] p = new double[k];
for (int z = 0; z < k; z++)
{
var theta_tmp = theta.P[z];
var phi_tmp = 0.0;
if (i == 0)
phi_tmp = phi_s[m - 1].P[seq[i].tIndex];
else
phi_tmp = phi_s[seq[i - 1].tIndex].P[seq[i].tIndex];
if (phi_tmp <= 0)
phi_tmp = 1.0 / Math.Pow(10, 100);
var f = Math.Exp(
(-1) * Math.Pow((z + 1 - matrix[i][seq[i].tIndex]
), 2)
);
p[z] = Math.Pow(10, 10) * theta_tmp * phi_tmp * f;
}
try
{
MathNet.Numerics.Distributions.Categorical dis = new MathNet.Numerics.Distributions.Categorical(p);
zArr_s[i] = dis.Sample();
}
catch (Exception e)
{
}
//draw theta
var beta_tmp = new double[k];
try
{
beta.CopyTo(beta_tmp, 0);
}
catch (Exception e)
{
}
foreach (var g in zArr_s.GroupBy(dp => dp))
{
beta_tmp[g.Key] += g.Count();
}
var d_tmp = new MathNet.Numerics.Distributions.Dirichlet(beta_tmp);
theta = new MathNet.Numerics.Distributions.Categorical(d_tmp.Sample());
//draw phi
for (int j = 0; j < m; j++)
{
p = Enumerable.Range(0, m).Select(dp => 1.0).ToArray();
for (int ii = 0; ii < n; ii++)
{
if (j == m - 1 && ii == 0)
{
var z = zArr_s[ii];
var f = Math.Exp(
(-1) * Math.Pow((z - matrix[ii][seq[ii].tIndex] / ((double)matrix[ii].Max()) * k), 2)
);
p[seq[ii].tIndex] *= phi_s[j].P[seq[ii].tIndex] * f;
}
else
{
if (ii == 0)
continue;
if (seq[ii - 1].tIndex == j)
{
var z = zArr_s[ii];
var f = Math.Exp(
(-1) * Math.Pow((z + 1 - matrix[ii][seq[ii].tIndex]
), 2)
);
p[seq[ii].tIndex] *= phi_s[j].P[seq[ii].tIndex] * f;
}
}
}
for (int s = 0; s < p.Length; s++)
if (p[s] == 1.0 || p[s] <= 0)
p[s] = 1.0 / Math.Pow(10, 100);
try
{
phi_s[j] = new MathNet.Numerics.Distributions.Categorical(p);
}
catch (Exception e)
{
}
}
}
}
public static void GibbsSamplingEach1(List<SequenceEle> seq, int[][] matrix)
{
for (int i = 0; i < n1; i++)
{
// i = 22230;
//draw z
// for (int ii = 0; ii < zArr_t.Count(); ii++)
// Console.Write(zArr_t[ii] + " ");
double[] p = new double[k];
for (int z = 0; z < k; z++)
{
var theta_tmp = theta.P[z];
var phi_tmp = 0.0;
if (i == 0)
phi_tmp = phi_t[m1 - 1].P[seq[i].tIndex];
else
phi_tmp = phi_t[seq[i - 1].tIndex].P[seq[i].tIndex];
if (phi_tmp <= 0)
phi_tmp = 1.0 / Math.Pow(10, 100);
var f = Math.Exp(
(-1) * Math.Pow((z + 1 - matrix[i][seq[i].tIndex]
), 2)
);
p[z] = Math.Pow(10, 10) * theta_tmp * phi_tmp * f;
}
try
{
MathNet.Numerics.Distributions.Categorical dis = new MathNet.Numerics.Distributions.Categorical(p);
zArr_t[i] = dis.Sample();
}
catch (Exception e)
{
}
// if(i<n)
// zArr_t[i] = (5*zArr_s[i] + 8*zArr_t[i])/13;
//draw theta
var beta_tmp = new double[k];
try
{
beta.CopyTo(beta_tmp, 0);
}
catch (Exception e)
{
}
foreach (var g in zArr_t.GroupBy(dp => dp))
{
beta_tmp[g.Key] += g.Count();
}
var d_tmp = new MathNet.Numerics.Distributions.Dirichlet(beta_tmp);
theta = new MathNet.Numerics.Distributions.Categorical(d_tmp.Sample());
//for (int sss = 0; sss < k; sss++)
//{
// theta_t= theta_t. +theta_s;
//}
// Console.WriteLine(theta.Mean+" "+theta.Entropy);
//draw phi
for (int j = 0; j < m1; j++)
{
p = Enumerable.Range(0, m1).Select(dp => 1.0).ToArray();
for (int ii = 0; ii < n1; ii++)
{
if (j == m1 - 1 && ii == 0)
{
var z = zArr_t[ii];
var f = Math.Exp(
(-1) * Math.Pow((z - matrix[ii][seq[ii].tIndex] / ((double)matrix[ii].Max()) * k), 2)
);
p[seq[ii].tIndex] *= phi_t[j].P[seq[ii].tIndex] * f;
}
else
{
if (ii == 0)
continue;
if (seq[ii - 1].tIndex == j)
{
var z = zArr_t[ii];
var f = Math.Exp(
(-1) * Math.Pow((z + 1 - matrix[ii][seq[ii].tIndex]
), 2)
);
p[seq[ii].tIndex] *= phi_t[j].P[seq[ii].tIndex] * f;
}
}
}
for (int s = 0; s < p.Length; s++)
if (p[s] == 1.0 || p[s] <= 0)
p[s] = 1.0 / Math.Pow(10, 100);
try
{
phi_t[j] = new MathNet.Numerics.Distributions.Categorical(p);
}
catch (Exception e)
{
}
}
}
}
}
}