1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
static
void
Main(
string
[] args)
{
string
text = ReadFile(
@"D:\系统桌面\ar1.txt"
);
PanGu.Segment.Init();
Segment segment =
new
Segment();
//调用分词方法
ICollection<WordInfo> words = segment.DoSegment(text);
Dictionary<
string
,
double
> d =
new
Dictionary<
string
,
double
>();
foreach
(
var
word
in
words)
{
//统计频率
float
tf = (
float
)System.Text.RegularExpressions.Regex.Matches(text, word.Word).Count / (
float
)words.Count;
if
(!d.ContainsKey(word.Word))
d[word.Word] = tf * word.Frequency;
}
//排序
var
lst = d.Select(x => x.Key).OrderByDescending(x => d[x]).ToList();
//打印出前5个关键词
lst.Take(5).ToList().ForEach(x => Console.WriteLine(x));
}
|
TF-IDF算法简介
最新推荐文章于 2022-11-07 11:38:44 发布