主要目的是统计一篇文章中的出现词语最多的个数,目前只是出个大概的框架,统计出来的结果只有两种,
一种是两个字的一种是三个字的,而且准确率不高,需要优化的地方太多,
先记下来,回头闲了继续优化一下,把准确率和处理速度提上来.
菜鸟写的不好,望大家多多指教.
</pre><pre name="code" class="plain">package main
import (
"fmt"
"io/ioutil"
"strings"
)
var Tri map[string]int = make(map[string]int)
var Dou map[string]int = make(map[string]int)
var Num int = 2
var Path string = "test.txt"
func main() {
Buf, _ := ioutil.ReadFile(Path)
n := len(Buf)
Str := string(Buf)
Get_slice(n, Buf, &Str)
List := Paixu(Dou)
List_Tri := Paixu(Tri)
Getvalue(Rm_duplicate(&List), Dou)
Getvalue(Rm_duplicate(&List_Tri), Tri)
}
func Get_slice(n int, Buf []byte, Str *string) {
var i, x, y int = 0, 0, 0
var Three string
for i < n-2 {
if Buf[i] > 128 {
x = i + 3
for x < n-2 {
if Buf[x] > 128 {
s := string(Buf[i:i+3]) + string(Buf[x:x+3])
if i := strings.Count(*Str, s); i > 1 {
Dou[s] = i
}
y = x + 3
for y < n-2 {
if Buf[y] > 128 {
Three = s + string(Buf[y:y+3])
break
} else {
y++
}
}
break
} else {
x++
}
}
if i := strings.Count(*Str, Three); i > 1 {
Tri[Three] = i
}
i = i + 3
} else {
i++
}
}
}
func Paixu(Map map[string]int) []int {
var list []int
for _, k := range Map {
list = append(list, k)
}
num := len(list)
for i := 0; i < num; i++ {
for j := i + 1; j < num; j++ {
if list[i] < list[j] {
list[i], list[j] = list[j], list[i]
}
}
}
return list
}
func Getvalue(list []int, Map map[string]int) {
for _, i := range list[:Num] {
for k, v := range Map {
if i == v {
fmt.Println(k, "出现次数:", i)
}
}
}
}
func Rm_duplicate(list *[]int) []int {
var x []int = []int{}
for _, i := range *list {
if len(x) == 0 {
x = append(x, i)
} else {
for k, v := range x {
if i == v {
break
}
if k == len(x)-1 {
x = append(x, i)
}
}
}
}
return x
}