代码如下:
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace 泛型
{
class Program
{
static Dictionary<string, int> CountWords(string text)
{
// 1.创建从单词到频率的新映射
Dictionary<string, int> frequencies;
frequencies = new Dictionary<string, int>();
// 2.将文本分解成单词
string[] words = Regex.Split(text, @"\W+");
Regex r = new Regex(@"[\u4e00-\u9fa5]"); //匹配是否有中文
foreach (var word in words)
{
if (word == ""||word.Length<=3||r.IsMatch(word))
continue;
//3. 添加或更新映射
if (frequencies.ContainsKey(word))
{
frequencies[word]++;
}
else {
frequencies[word] = 1;
}
}
return frequencies;
}
static void Main(string[] args)
{
// 读取要统计的文件
string text = ReadTxtContent(@"c:\users\administrator\documents\visual studio 2015\Projects\泛型\泛型\1.txt");
//过虐可能认识的单词
// string filter = ReadTxtContent(@"c:\users\administrator\documents\visual studio 2015\Projects\泛型\泛型\proj.txt");
Dictionary<string, int> frequencies = CountWords(text);
// Dictionary<string, int> filterWords = CountWords(filter);
var dicSort = from objDic in frequencies orderby objDic.Value descending select objDic; //降序 如果想按升序(顺序)排列,只需要把变量 dicSort 右边的 descending 去掉即可。 如果要按 Key 排序,只需要把变量 dicSort 右边的 objDic.Value 改为 objDic.Key 即可。
foreach (var entry in dicSort)
{
// 4. 打印映射中的每个键值对
string word = entry.Key;
int frequency = entry.Value;
if (frequency < 5 )//||filterWords.ContainsKey(word)) 本来想进行过滤的 但这样写系统进行一个一个匹配 太费时间,需要算法优化。
{
continue;
}
Console.WriteLine("{0}:{1}",word,frequency);
}
Console.ReadKey();
}
static string text = "";
static string content = "";
public static string ReadTxtContent(string Path)
{
StreamReader sr = new StreamReader(Path, Encoding.UTF8);
while ((content = sr.ReadLine()) != null)
{
text += content;
}
return text.ToString();
}
}
}