public class LdaGibbsSampling {
public static class modelparameters {
float alpha = 0.5f; //usual value is 50 / K
float beta = 0.1f;//usual value is 0.1
int topicNum = 100;
int iteration = 100;
int saveStep = 10;
int beginSaveIters = 50;
}
/**Get parameters from configuring file. If the
* configuring file has value in it, use the value.
* Else the default value in program will be used
* @param ldaparameters
* @param parameterFile
* @return void
*/
private static void getParametersFromFile(modelparameters ldaparameters,
String parameterFile) {
// TODO Auto-generated method stub
ArrayList<String> paramLines = new ArrayList<String>();
paramLines = FileUtil.readList(parameterFile);
for(String line : paramLines){
String[] lineParts = line.split("\t");
switch(parameters.valueOf(lineParts[0])){
case alpha:
ldaparameters.alpha = Float.valueOf(lineParts[1]);
break;
case beta:
ldaparameters.beta = Float.valueOf(lineParts[1]);
break;
case topicNum:
ldaparameters.topicNum = Integer.valueOf(lineParts[1]);
break;
case iteration:
ldaparameters.iteration = Integer.valueOf(lineParts[1]);
break;
case saveStep:
ldaparameters.saveStep = Integer.valueOf(lineParts[1]);
break;
case beginSaveIters:
ldaparameters.beginSaveIters = Integer.valueOf(lineParts[1]);
break;
}
}
}
public enum parameters{
alpha, beta, topicNum, iteration, saveStep, beginSaveIters;
}
/**
* 训练LDA主题模型,对给定的测试样本集进行主题预测,找出每个样本的最大概率主题下的前20个词的集合,作为该测试样本集的主题代表关键词集合
* @param trainPathDir
* @param parameterFile
* @param resultPath
* @param testPath
* @return
* @throws IOException
*/
public Set<Word> trainAndPredictLDA(String trainPathDir,String parameterFile,String resultPath,String testPath) throws IOException{
modelparameters ldaparameters = new modelparameters();
getParametersFromFile(ldaparameters, parameterFile);
Documents docSet = new Documents();
docSet.readDocs(trainPathDir);
System.out.println("wordMap size " + docSet.termToIndexMap.size());
FileUtil.mkdir(resultPath);
LdaModel model = new LdaModel(ldaparameters);
System.out.println("1 Initialize the model ...");
model.initializeModel(docSet);
System.out.println("2 Learning and Saving the model ...");
model.inferenceModel(docSet);
System.out.println("3 Output the final model ...");
// model.saveIteratedModel(ldaparameters.iteration, docSet);
// System.out.println("Done!");
//预测新文本
Documents testDocs = new Documents();
List<Message> messages = FileUtil.readMessageFromFile(testPath);
Set<Integer> topicIndexSet = new HashSet<Integer> ();
for(Message message : messages){
String content = message.getContent();
Document doc = new Document(content);
testDocs.docs.add(doc);
topicIndexSet.add(model.predictNewSampleTopic(doc));
}
/**
* 预测每条短信,得到每条的最大概率主题,最后找到每个最大概率主题的前20个词,集合,计算tf-idf
*/
Set<Word> wordSet = model.getWordByTopics(topicIndexSet, 20);
LDAFeatureProcess.calTFIDFAsWeight(docSet, wordSet);
return wordSet;
}
@Test
public void test() throws IOException{
String resultPath = "ldaResult/";
String parameterFile= "source/lda_parameters.txt";
String trainPathDir = "LDATrain/";
String testPath = "train/train_messages.txt";
Set<Word> wordSet = trainAndPredictLDA(trainPathDir,parameterFile,resultPath,testPath);
FileUtil.writeKeyWordFile("ldaWords/keyWords.doc", new ArrayList<Word>(wordSet));
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
String resultPath = "ldaResult/";
String parameterFile= "source/lda_parameters.txt";
modelparameters ldaparameters = new modelparameters();
getParametersFromFile(ldaparameters, parameterFile);
String dirPath = "LDATrain/";
Documents docSet = new Documents();
docSet.readDocs(dirPath);
System.out.println("wordMap size " + docSet.termToIndexMap.size());
FileUtil.mkdir(resultPath);
LdaModel model = new LdaModel(ldaparameters);
System.out.println("1 Initialize the model ...");
model.initializeModel(docSet);
System.out.println("2 Learning and Saving the model ...");
model.inferenceModel(docSet);
System.out.println("3 Output the final model ...");
model.saveIteratedModel(ldaparameters.iteration, docSet);
System.out.println("Done!");
//预测新文本
String messStr = "好消息!!薇町婚纱造型推出老带新活动啦!已在本店预定的新娘推荐新顾客来本店,定单后即赠送新、老顾客各一支价值58元定妆隔离水(在婚礼当";
Document doc = new Document(messStr);
int topicIndex = model.predictNewSampleTopic(doc);
Set<Word> wordSet = model.getWordByTopic(topicIndex);
FileUtil.writeKeyWordFile("ldaWords/comparedkeyWords.doc", new ArrayList<Word>(wordSet));
}
}
public class LdaModel {
int [][] doc;//word index array
int V, K, M;//vocabulary size, topic number, document number
int [][] z;//topic label array
float alpha; //doc-topic dirichlet prior parameter
float beta; //topic-word dirichlet prior parameter
int [][] nmk;//given document m, count times of topic k. M*K
int [][] nkt;//given topic k, count times of term t. K*V
int [] nmkSum;//Sum for each row in nmk
int [] nktSum;//Sum for each row in nkt
double [][] phi;//Parameters for topic-word distribution K*V
double [][] theta;//Parameters for doc-topic distribution M*K
int iterations;//Times of iterations
int saveStep;//The number of iterations between two saving
int beginSaveIters;//Begin save model at this iteration
Map<String, Integer> wordIndexMap;
Documents docSet;
public LdaModel(LdaGibbsSampling.modelparameters modelparam) {
// TODO Auto-generated constructor stub
alpha = modelparam.alpha;
beta = modelparam.beta;
iterations = modelparam.iteration;
K = modelparam.topicNum;
saveStep = modelparam.saveStep;
beginSaveIters = modelparam.beginSaveIters;
}
public void initializeModel(Documents docSet) {
this.docSet = docSet;
// TODO Auto-generated method stub
M = docSet.docs.size();
V = docSet.termToIndexMap.size();
nmk = new int [M][K];
nkt = new int[K][V];
nmkSum = new int[M];
nktSum = new int[K];
phi = new double[K][V];
theta = new double[M][K];
this.wordIndexMap = new HashMap<String, Integer> ();
//initialize documents index array
doc = new int[M][];
for(int m = 0; m < M; m++){
//Notice the limit of memory
int N = docSet.docs.get(m).docWords.length;
doc[m] = new int[N];
for(int n = 0; n < N; n++){
doc[m][n] = docSet.docs.get(m).docWords[n];
}
}
//initialize topic lable z for each word
z = new int[M][];
for(int m = 0; m < M; m++){
int N = docSet.docs.get(m).docWords.length;
z[m] = new int[N];
for(int n = 0; n < N; n++){
//随机初始化!
int initTopic = (int)(Math.random() * K);// From 0 to K - 1
z[m][n] = initTopic;
//number of words in doc m assigned to topic initTopic add 1
nmk[m][initTopic]++;
//number of terms doc[m][n] assigned to topic initTopic add 1
nkt[initTopic][doc[m][n]]++;
// total number of words assigned to topic initTopic add 1
nktSum[initTopic]++;
}
// total number of words in document m is N
nmkSum[m] = N;
}
}
public void inferenceModel(Documents docSet) throws IOException {
// TODO Auto-generated method stub
if(iterations < saveStep + beginSaveIters){
System.err.println("Error: the number of iterations should be larger than " + (saveStep + beginSaveIters));
System.exit(0);
}
for(int i = 0; i < iterations; i++){
System.out.println("Iteration " + i);
if((i >= beginSaveIters) && (((i - beginSaveIters) % saveStep) == 0)){
//Saving the model
System.out.println("Saving model at iteration " + i +" ... ");
//Firstly update parameters
updateEstimatedParameters();
//Secondly print model variables
saveIteratedModel(i, docSet);
}
//Use Gibbs Sampling to update z[][]
for(int m = 0; m < M; m++){
int N = docSet.docs.get(m).docWords.length;
for(int n = 0; n < N; n++){
// Sample from p(z_i|z_-i, w)
int newTopic = sampleTopicZ(m, n);
z[m][n] = newTopic;
}
}
}
}
private void updateEstimatedParameters() {
// TODO Auto-generated method stub
for(int k = 0; k < K; k++){
for(int t = 0; t < V; t++){
phi[k][t] = (nkt[k][t] + beta) / (nktSum[k] + V * beta);
}
}
for(int m = 0; m < M; m++){
for(int k = 0; k < K; k++){
theta[m][k] = (nmk[m][k] + alpha) / (nmkSum[m] + K * alpha);
}
}
}
private int sampleTopicZ(int m, int n) {
// TODO Auto-generated method stub
// Sample from p(z_i|z_-i, w) using Gibbs upde rule
//Remove topic label for w_{m,n}
int oldTopic = z[m][n];
nmk[m][oldTopic]--;
nkt[oldTopic][doc[m][n]]--;
nmkSum[m]--;
nktSum[oldTopic]--;
//Compute p(z_i = k|z_-i, w)
double [] p = new double[K];
for(int k = 0; k < K; k++){
p[k] = (nkt[k][doc[m][n]] + beta) / (nktSum[k] + V * beta) * (nmk[m][k] + alpha) / (nmkSum[m] + K * alpha);
}
//Sample a new topic label for w_{m, n} like roulette
//Compute cumulated probability for p
for(int k = 1; k < K; k++){
p[k] += p[k - 1];
}
double u = Math.random() * p[K - 1]; //p[] is unnormalised
int newTopic;
for(newTopic = 0; newTopic < K; newTopic++){
if(u < p[newTopic]){
break;
}
}
//Add new topic label for w_{m, n}
nmk[m][newTopic]++;
nkt[newTopic][doc[m][n]]++;
nmkSum[m]++;
nktSum[newTopic]++;
return newTopic;
}
/**
* 对给定的待预测的文本,将其分词结果的单词与训练集的单词的索引对应上
* @param predictWordSet
* @return
*/
public Map<String,String> matchTermIndex(Set<Word> predictWordSet){
/**
* key:word的内容 value:文档index-单词index,如“1-2”
*/
Map<String,String> wordIndexMap = new HashMap<String, String> ();
for(Word word : predictWordSet){
String content = word.getContent();
String indexStr = getTermIndex(content);
wordIndexMap.put(content, indexStr);
}
return wordIndexMap;
}
/**
* 对于给定单词,找到该单词在训练集中对应的文档和单词索引
* @param content
* @return
*/
public String getTermIndex(String content){
for(Integer m : docSet.getDocWordsList().keySet()){
LinkedList<String> list = docSet.getDocWordsList().get(m);
for(int i = 0; i < list.size(); i ++){
if(list.get(i).equals(content))
return m+"-"+i;
}
}
return "none";
}
/**
* 在训练完LDA模型后,根据给定的主题索引set,得到每个主题的topNum单词列表集合
* @param topicIndexSet
* @param topNum
* @return
*/
public Set<Word> getWordByTopics(Set<Integer> topicIndexSet, int topNum){
Set<Word> wordSet = new HashSet<Word> ();
for(Integer indexT : topicIndexSet){
List<Integer> tWordsIndexArray = new ArrayList<Integer>();
for(int j = 0; j < V; j++)
tWordsIndexArray.add(new Integer(j));
Collections.sort(tWordsIndexArray, new LdaModel.TwordsComparable(phi[indexT]));
for(int t = 0; t < topNum; t++){
String content = docSet.indexToTermMap.get(tWordsIndexArray.get(t));
Word word = new Word(content);
if(SegmentWordsResult.getStopWordsSet().contains(content)||
ProcessKeyWords.remove(word) || ProcessKeyWords.isMeaninglessWord(content))
continue;
wordSet.add(word);
}
}
return wordSet;
}
public Set<Word> getWordByTopic(Integer topicIndex){
Set<Word> wordSet = new HashSet<Word> ();
List<Integer> tWordsIndexArray = new ArrayList<Integer>();
for(int j = 0; j < V; j++){
tWordsIndexArray.add(new Integer(j));
}
Collections.sort(tWordsIndexArray, new LdaModel.TwordsComparable(phi[topicIndex]));
for(int t = 0; t < V; t++){
String content = docSet.indexToTermMap.get(tWordsIndexArray.get(t));
Word word = new Word(content);
word.setWeight(phi[topicIndex][tWordsIndexArray.get(t)]);
if(SegmentWordsResult.getStopWordsSet().contains(content)||
ProcessKeyWords.remove(word) || ProcessKeyWords.isMeaninglessWord(content))
continue;
if(phi[topicIndex][tWordsIndexArray.get(t)] <= 0.0)
continue;
wordSet.add(word);
}
return wordSet;
}
public int predictNewSampleTopic(Document doc){
double topicProb[] = new double[K];
Map<String,String> wordIndexMap = matchTermIndex(doc.getWordMap().keySet());
int predict_v = doc.getWordCount();
int [][] predict_nkt;//given topic k, count times of term t. K*V
double [][] predict_phi;//Parameters for topic-word distribution K*V
int [] predict_z;//topic label array
int [] predict_nk;//该文档覆盖的主题索引,值为该文档覆盖指定主题的次数
predict_nkt = new int[K][predict_v];
predict_phi = new double[K][predict_v];
predict_z = new int[predict_v];
predict_nk = new int[K];
for(int index = 0; index < predict_v; index++){
String content = doc.getWordsList().get(index);
String indexStr = wordIndexMap.get(content);
if(indexStr.indexOf("-") == -1)
continue;
int m = Integer.valueOf(indexStr.substring(0, indexStr.indexOf("-")));
int n = Integer.valueOf(indexStr.substring(indexStr.indexOf("-")+1));
// Sample from p(z_i|z_-i, w)
int newTopic = predictSampleTopicZ(m, n);
predict_z[index] = newTopic;
predict_nkt[newTopic][index] ++;
predict_nk[newTopic] ++;
}
for(int k = 0; k < K; k++){
topicProb[k] = (predict_nk[k] + alpha) / (predict_v + K * alpha);
}
return getTopic(topicProb);
}
public int getTopic(double[] topicProp){
int maxIndex = 0;
double maxProp = topicProp[0];
Set<String> words = new HashSet<String> ();
for(int k = 1; k < K; k ++){
if(maxProp < topicProp[k]){
maxProp = topicProp[k];
maxIndex = k;
}
}
return maxIndex;
}
public int predictSampleTopicZ(int m, int n){
// TODO Auto-generated method stub
// Sample from p(z_i|z_-i, w) using Gibbs upde rule
//Compute p(z_i = k|z_-i, w)
double [] p = new double[K];
for(int k = 0; k < K; k++){
p[k] = (nkt[k][doc[m][n]] + beta) / (nktSum[k] + V * beta) * (nmk[m][k] + alpha) / (nmkSum[m] + K * alpha);
}
//Sample a new topic label for w_{m, n} like roulette
//Compute cumulated probability for p
for(int k = 1; k < K; k++){
p[k] += p[k - 1];
}
double u = Math.random() * p[K - 1]; //p[] is unnormalised
int newTopic;
for(newTopic = 0; newTopic < K; newTopic++){
if(u < p[newTopic]){
break;
}
}
//Add new topic label for w_{m, n}
return newTopic;
}
public void saveIteratedModel(int iters, Documents docSet) throws IOException {
// TODO Auto-generated method stub
//lda.params lda.phi lda.theta lda.tassign lda.twords
//lda.params
String resultPath = "ldaResult/";
String modelName = "lda_" + iters;
ArrayList<String> lines = new ArrayList<String>();
lines.add("alpha = " + alpha);
lines.add("beta = " + beta);
lines.add("topicNum = " + K);
lines.add("docNum = " + M);
lines.add("termNum = " + V);
lines.add("iterations = " + iterations);
lines.add("saveStep = " + saveStep);
lines.add("beginSaveIters = " + beginSaveIters);
FileUtil.writeLines(resultPath + modelName + ".params", lines);
//lda.phi K*V
BufferedWriter writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".phi"));
for (int i = 0; i < K; i++){
for (int j = 0; j < V; j++){
writer.write(phi[i][j] + "\t");
}
writer.write("\n");
}
writer.close();
//lda.theta M*K
writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".theta"));
for(int i = 0; i < M; i++){
for(int j = 0; j < K; j++){
writer.write(theta[i][j] + "\t");
}
writer.write("\n");
}
writer.close();
//lda.tassign
writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".tassign"));
for(int m = 0; m < M; m++){
for(int n = 0; n < doc[m].length; n++){
writer.write(doc[m][n] + ":" + z[m][n] + "\t");
}
writer.write("\n");
}
writer.close();
List<Word> appendwords = new ArrayList<Word> ();
//lda.twords phi[][] K*V
writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".twords"));
int topNum = 10; //Find the top 20 topic words in each topic
for(int i = 0; i < K; i++){
List<Integer> tWordsIndexArray = new ArrayList<Integer>();
for(int j = 0; j < V; j++){
tWordsIndexArray.add(new Integer(j));
}
Collections.sort(tWordsIndexArray, new LdaModel.TwordsComparable(phi[i]));
writer.write("topic " + i + "\t:\t");
for(int t = 0; t < topNum; t++){
writer.write(docSet.indexToTermMap.get(tWordsIndexArray.get(t)) + " " + phi[i][tWordsIndexArray.get(t)] + "\t");
Word word = new Word(docSet.indexToTermMap.get(tWordsIndexArray.get(t)));
word.setWeight(phi[i][tWordsIndexArray.get(t)]);
appendwords.add(word);
}
writer.write("\n");
}
writer.close();
//lda.words
writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".words"));
for(Word word : appendwords){
if(word.getContent().trim().equals(""))
continue;
writer.write(word.getContent()+"\t"+word.getWeight()+"\n");
}
writer.close();
}
public class TwordsComparable implements Comparator<Integer> {
public double [] sortProb; // Store probability of each word in topic k
public TwordsComparable (double[] sortProb){
this.sortProb = sortProb;
}
@Override
public int compare(Integer o1, Integer o2) {
// TODO Auto-generated method stub
//Sort topic word index according to the probability of each word in topic k
if(sortProb[o1] > sortProb[o2]) return -1;
else if(sortProb[o1] < sortProb[o2]) return 1;
else return 0;
}
}
public static void main(String[] args){
}
}
public class Documents {
ArrayList<Document> docs;
Map<String, Integer> termToIndexMap;
ArrayList<String> indexToTermMap;
Map<String,Integer> termCountMap;
private static NLPIRUtil npr = new NLPIRUtil();
private static Set<String> stopWordsSet = SegmentWordsResult.getStopWordsSet();
private Map<Word,Integer> wordDocMap;
private Map<Integer, LinkedList<String>> docWordsList;//key:第i篇文档,value:单词列表,为了与lda模型中的doc[m][n]的索引对应
public Documents(){
docs = new ArrayList<Document>();
termToIndexMap = new HashMap<String, Integer>();
indexToTermMap = new ArrayList<String>();
termCountMap = new HashMap<String, Integer>();
this.wordDocMap = new HashMap<Word, Integer> ();
this.docWordsList = new HashMap<Integer, LinkedList<String>> ();
}
public Map<String, Integer> getTermCountMap() {
return termCountMap;
}
public void setTermCountMap(Map<String, Integer> termCountMap) {
this.termCountMap = termCountMap;
}
public Map<Word, Integer> getWordDocMap() {
return wordDocMap;
}
public void setWordDocMap(Map<Word, Integer> wordDocMap) {
this.wordDocMap = wordDocMap;
}
public Map<Integer, LinkedList<String>> getDocWordsList() {
return docWordsList;
}
public void setDocWordsList(Map<Integer, LinkedList<String>> docWordsList) {
this.docWordsList = docWordsList;
}
public void readDocs(String docsPath){
int index = 0;
for(File docFile : new File(docsPath).listFiles()){
Document doc = new Document(docFile.getAbsolutePath(), termToIndexMap, indexToTermMap, termCountMap);
docs.add(doc);
for(Word word : doc.getWordMap().keySet()){
if(this.wordDocMap.containsKey(word))
this.wordDocMap.put(word, this.wordDocMap.get(word));
else
this.wordDocMap.put(word, 1);
}
this.docWordsList.put(index++, doc.getWordsList());
}
}
}
public class Document {
private static NLPIRUtil npr = new NLPIRUtil();
private static Set<String> stopWordsSet = SegmentWordsResult.getStopWordsSet();
private String docName;
int[] docWords;
private int wordCount;
private Map<Word, Integer> wordMap ;
private LinkedList<String> wordsList;//为了和docWords的索引对应,即单词内容对应索引值
public int getWordCount() {
return wordCount;
}
public void setWordCount(int wordCount) {
this.wordCount = wordCount;
}
public Map<Word, Integer> getWordMap() {
return wordMap;
}
public void setWordMap(Map<Word, Integer> wordMap) {
this.wordMap = wordMap;
}
public LinkedList<String> getWordsList() {
return wordsList;
}
public void setWordsList(LinkedList<String> wordsList) {
this.wordsList = wordsList;
}
public Document(String docContent){
this.wordMap = new HashMap<Word, Integer> ();
this.wordsList = new LinkedList<String> ();
String splitResult = npr.NLPIR_ParagraphProcess(ProcessMessage.dealWithSentence(docContent), 0);
String[] wordsArray = splitResult.split(" ");
this.docWords = new int[wordsArray.length];
int index = 0;
//Transfer word to index
for(String str : wordsArray){
String content = ProcessMessage.dealSpecialString(str);
Word word = new Word(content);
if(ProcessKeyWords.remove(word) || stopWordsSet.contains(content))
continue;
else if(content.length() <= 1 || RegexMatch.specialMatch(content))
continue;
this.wordCount ++;
if(!wordMap.containsKey(content)){
int newIndex = wordMap.size();
wordMap.put(word, 1);
docWords[index++] = newIndex;
}else{
wordMap.put(word, wordMap.get(word)+1);
docWords[index++] = wordMap.get(content);
}
this.wordsList.add(content);
}
}
public Document(String filePath,Map<String, Integer> termToIndexMap, ArrayList<String> indexToTermMap, Map<String, Integer> termCountMap){
this(FileUtil.readContent(filePath));
this.docName = filePath;
this.wordMap = new HashMap<Word, Integer> ();
this.wordsList = new LinkedList<String> ();
//Read file and initialize word index array
String docContent = FileUtil.readContent(docName);
String splitResult = npr.NLPIR_ParagraphProcess(docContent, 0);
String[] wordsArray = splitResult.split(" ");
this.docWords = new int[wordsArray.length];
int index = 0;
//Transfer word to index
for(String str : wordsArray){
String content = ProcessMessage.dealSpecialString(str);
Word word = new Word(content);
if(ProcessKeyWords.remove(word) || stopWordsSet.contains(content))
continue;
else if(ProcessKeyWords.isMeaninglessWord(content))
continue;
this.wordCount ++;
if(!termToIndexMap.containsKey(content)){
int newIndex = termToIndexMap.size();
termToIndexMap.put(str, newIndex);
indexToTermMap.add(str);
termCountMap.put(str, new Integer(1));
docWords[index++] = newIndex;
}else{
termCountMap.put(content, termCountMap.get(content) + 1);
docWords[index++] = termToIndexMap.get(content);
}
this.wordsList.add(content);
if(wordMap.containsKey(word))
wordMap.put(word, wordMap.get(word)+1);
else
wordMap.put(word, 1);
}
}
public boolean isNoiseWord(String string) {
// TODO Auto-generated method stub
string = string.toLowerCase().trim();
Pattern MY_PATTERN = Pattern.compile(".*[a-zA-Z]+.*");
Matcher m = MY_PATTERN.matcher(string);
// filter @xxx and URL
if(string.matches(".*www\\..*") || string.matches(".*\\.com.*") ||
string.matches(".*http:.*") )
return true;
else
return false;
}
}
上述中的LdaModel中包含了预测新样本的方法predictNewSampleTopic,返回的是该样本的最大概率主题索引,LdaGibbsSampling中是训练LDA主题模型的流程
主题-单词分布的部分结果如下:
topic 0 : ⒐ 0.0029859442729502916 住宅 0.002257665153592825制造 0.002257665153592825 行为 0.002257665153592825收益 0.0015293860342353582 西北 0.0015293860342353582红星 0.0015293860342353582 轻松 0.0015293860342353582小商品 0.0015293860342353582 搜房网 0.0015293860342353582
topic 1 : 贵宾 0.0030435749795287848 商城 0.0023012396413832903 太平洋保险 0.0015589043032377958 建设 0.0015589043032377958 储蓄 0.0015589043032377958 周四 0.0015589043032377958 完成 0.0015589043032377958 区内 0.0015589043032377958 王志钢 0.0015589043032377958 872944 0.0015589043032377958topic 2 : 油田 0.0017282527405768633 雀巢 0.0017282527405768633 金千 0.0017282527405768633 山腰 9.052753448486328E-4
代办 9.052753448486328E-4 洋房 9.052753448486328E-4 月饼 9.052753448486328E-4 三星 9.052753448486328E-4 集成 9.052753448486328E-4 大桥 9.052753448486328E-4
topic 3 : 美容 0.0016053818399086595 疯狂 0.0016053818399086595 获取 0.0016053818399086595 名牌 0.0016053818399086595 风神 0.0016053818399086595 小额 0.0016053818399086595 璀璨 0.0016053818399086595 一千 0.0016053818399086595 专注 0.0016053818399086595 发放 0.0016053818399086595
topic 4 : 焦点 0.002957939635962248 搜狐 0.002236490836367011
房屋 0.002236490836367011 玉兰 0.002236490836367011 短期 0.002236490836367011 理疗 0.002236490836367011 4001080000 0.0015150421531870961 命题 0.0015150421531870961 公开 0.0015150421531870961 乐器 0.0015150421531870961
topic 5 : 实验 0.0023698494769632816 每块 0.0023698494769632816 收费 0.0023698494769632816 博览 0.0016053818399086595 重新 0.0016053818399086595 任意 0.0016053818399086595 借款 0.0016053818399086595 保底 0.0016053818399086595 预期 0.0016053818399086595 初二 0.0016053818399086595
topic 6 : 宗旨 0.0016625761054456234 陈勇军 0.0016625761054456234 拨打 0.0016625761054456234 家人 0.0016625761054456234 工业 0.0016625761054456234 百货店 0.0016625761054456234 实业 0.0016625761054456234 6222024000068818521 0.0016625761054456234 18692297994 0.0016625761054456234 13300 0.0016625761054456234
topic 7 : → 0.005167018622159958 餐厅 0.00298377126455307 保修 0.00298377126455307 英语 0.0022560220677405596
红 0.0022560220677405596 普通 0.0022560220677405596 学习 0.001528272987343371 龙湖 0.001528272987343371 电大 0.001528272987343371 任意 0.001528272987343371
topic 8 : 登陆 0.0025078877806663513 食宿 0.001698891632258892 急需 0.001698891632258892 建行 0.001698891632258892 葡萄酒 0.001698891632258892 新版 0.001698891632258892 富豪 0.001698891632258892 对比 0.001698891632258892 泥工 0.001698891632258892 相信 8.898956584744155E-4
topic 9 : 体育 0.7940398454666138 活动 0.005577780772000551 优惠 0.0038460372015833855 欢迎 0.003806901630014181 银行 0.0032981408294290304 电话 0.003268789267167449 联系 0.0031611667945981026 公司 0.002769812010228634 地址 0.0024860799312591553 】 0.002339322119951248
topic 10 : 年级 0.0023899467196315527
车主 0.0023899467196315527 过程 0.0016189961461350322 华联 0.0016189961461350322 家电 0.0016189961461350322 大业 0.0016189961461350322 时代 0.0016189961461350322 迪赛尼斯 0.0016189961461350322 稀缺 0.0016189961461350322 稳定 0.0016189961461350322
topic 11 : 利率 0.002570267766714096 知名 0.002570267766714096 南湖 0.0017411491135135293 实现 0.0017411491135135293 立秋 0.0017411491135135293 就读 0.0017411491135135293 罗马 0.0017411491135135293 广电局 0.0017411491135135293 独具 0.0017411491135135293 静候 0.0017411491135135293
topic 12 : 哥哥 0.0029536776710301638 家里 0.0029536776710301638 化妆 0.0029536776710301638 名品 0.0022332684602588415
一 0.0022332684602588415 四川 0.0015128592494875193 二手车 0.0015128592494875193 订购 0.0015128592494875193 多种 0.0015128592494875193 潜力 0.0015128592494875193
topic 13 : 建行 0.002435001078993082 开发商 0.0016495168674737215 美容 0.0016495168674737215 奔驰 0.0016495168674737215 比例 0.0016495168674737215 英伦 0.0016495168674737215 开通 0.0016495168674737215 开班 0.0016495168674737215 打开 0.0016495168674737215 英国 0.0016495168674737215
topic 14 : 增值 0.002355444012209773 [验] 0.002355444012209773 公开 0.0015956234419718385 打印机 0.0015956234419718385 家中 0.0015956234419718385 宾馆 0.0015956234419718385 12000 0.0015956234419718385 渠道 0.0015956234419718385 租赁 0.0015956234419718385 无效 0.0015956234419718385
topic 15 : 自由 0.0024857670068740845
巴拉巴 0.0024857670068740845
丰 0.0024857670068740845 朝阳 0.001683906652033329 家人 0.001683906652033329 84725588 0.001683906652033329 老弟 0.001683906652033329 商住 0.001683906652033329 县委 0.001683906652033329 德国 8.820463554002345E-4
topic 16 : ¥10亿 0.002975110663101077 楼下 0.002249473938718438 感恩 0.002249473938718438 独栋 0.002249473938718438 前来 0.0015238370979204774 手机 0.0015238370979204774 申请 0.0015238370979204774
乐 0.0015238370979204774 考点 0.0015238370979204774 3008300 0.0015238370979204774
topic 17 : 批发 0.00239548715762794 总监 0.0016227493761107326 车子 0.0016227493761107326 饭店 0.0016227493761107326 伙伴 0.0016227493761107326 直属 0.0016227493761107326 事后 0.0016227493761107326 翰林 0.0016227493761107326 专题片 0.0016227493761107326 装修 8.500116528011858E-4
topic 18 : 期待 0.0024758405052125454
价 0.0016771822702139616 你好 0.0016771822702139616 决定 0.0016771822702139616 助剂 0.0016771822702139616 人员 0.0016771822702139616 雄伟 0.0016771822702139616 只用 0.0016771822702139616 享受 8.785240934230387E-4 四川 8.785240934230387E-4
topic 19 : 房价 0.003103474387899041 底价 0.0023465293925255537 湖南 0.0015895843971520662
凡 0.0015895843971520662 送礼 0.0015895843971520662 恒大 0.0015895843971520662 一生 0.0015895843971520662 代言人 0.0015895843971520662 专车 0.0015895843971520662 大唐 0.0015895843971520662
topic 20 : 企业主 0.0023483068216592073 讲师 0.0023483068216592073
6222021001055293358 0.0023483068216592073 首发 0.0015907884808257222 认购 0.0015907884808257222 请问 0.0015907884808257222 发布 0.0015907884808257222 中午 0.0015907884808257222 开幕 0.0015907884808257222 ⒍ 0.0015907884808257222
topic 21 : 重新 0.002323663793504238 帮忙 0.002323663793504238 85654475 0.002323663793504238
宾 0.002323663793504238
中国 0.0015740948729217052 学历 0.0015740948729217052 " 0.0015740948729217052 温州 0.0015740948729217052 好久 0.0015740948729217052 钢板 0.0015740948729217052
topic 22 : 可口 0.0024103878531605005 形象 0.0024103878531605005 减轻 0.0024103878531605005 高层 0.0016328433994203806 爸爸 0.0016328433994203806 基金 0.0016328433994203806 营业额 0.0016328433994203806 意大利 0.0016328433994203806 正常 0.0016328433994203806 吉智 0.0016328433994203806
topic 23 : 关系 0.0024738647043704987 经营 0.0016758438432589173 美容 0.0016758438432589173 梦想 0.0016758438432589173 喷漆 0.0016758438432589173 肌肤 0.0016758438432589173 刘汉琳 0.0016758438432589173 索菲 0.0016758438432589173 依依 0.0016758438432589173 欢迎 8.778230403549969E-4
topic 24 : 考试 0.0016652129124850035 上班 0.0016652129124850035 金条 0.0016652129124850035
宝 0.0016652129124850035 澳门 0.0016652129124850035 粘贴 0.0016652129124850035 收缩 0.0016652129124850035 18800574923 0.0016652129124850035 豪华 8.722544298507273E-4 老师 8.722544298507273E-4
topic 25 : 长期 0.0030594731215387583 开发区 0.0023132602218538523 低价 0.0023132602218538523 ⑥ 0.0023132602218538523 转告 0.0023132602218538523
新 0.0015670472057536244 得到 0.0015670472057536244 [通] 0.0015670472057536244 融资 0.0015670472057536244 万科 0.0015670472057536244
topic 26 : 开发区 0.002339445985853672 石油 0.0015847859904170036 宁波 0.0015847859904170036 更换 0.0015847859904170036 不用 0.0015847859904170036 会议 0.0015847859904170036 初三 0.0015847859904170036 汽车站 0.0015847859904170036 抽空 0.0015847859904170036 实用 0.0015847859904170036
topic 27 : 代办 0.0016745076281949878 代表 0.0016745076281949878 女性 0.0016745076281949878 13825139678 0.0016745076281949878 承担 0.0016745076281949878 影响力 0.0016745076281949878 13934141989 0.0016745076281949878 槐花 0.0016745076281949878
沐 0.0016745076281949878 过敏 0.0016745076281949878
topic 28 : 婚礼 0.00862991251051426 海尔 0.002210969338193536 电影 0.002210969338193536 小乔 0.002210969338193536 15953174009 0.002210969338193536 茶店 0.002210969338193536 7627292. 0.002210969338193536 15985917304 0.002210969338193536 新余 0.001497753313742578 资料 0.001497753313742578
topic 29 : 【 0.021667908877134323
你 0.015670640394091606 您好 0.01555958017706871 光临 0.014560035429894924
尊敬 0.014337914064526558 现在 0.013005186803638935 】 0.012338823638856411 享受 0.010783976875245571 信用 0.009451250545680523 详情 0.007896402850747108
topic 30 : 西吉 0.0024778195656836033 封顶 0.0016785229090601206 押金 0.0016785229090601206 海外 0.0016785229090601206 澜庭 0.0016785229090601206 账户 0.0016785229090601206 原因 0.0016785229090601206
6222021001036927348 0.0016785229090601206 欧莱雅 0.0016785229090601206 推荐 8.792263106442988E-4
</pre><pre name="code" class="java">