方法1:使用sed

Shell>cat a1.txt

123a123,555

456.333

566555!88,thisis a good boy.


Shell>cat a1.txt|sed 's/[[:space:]|[:punct:]]/\n/g'|sed '/^$/d'|sort|uniq -c|sort -n-k1 -r

      2 555

      1 this

      1 is

      1 good

      1 boy

      1 a123

      1 a

      1 88

      1 566

      1 456

      1 333

      1 123

Shell>


sed 's/[[:space:]|[:punct:]]/\n/g'

[]表示正则表达式集合,[:space:]代表空格。[:punct:]代表标点符号。

[[:space:]|[:punct:]]代表匹配空格或者标点

s/[[:space:]|[:punct:]]/\n/g代表把空格或标点替换成\n换行符


sed '/^$/d' 删除掉空行



方法2:使用awk

#!/bin/bash


filename=$1


cat$filename|awk '{

  #getline var;

  split($0,a,/[[:space:]|[:punct:]]/);

  for(i in a) {

    word=a[i];

    b[word]++;

  }

}

  END{

   printf("%-14s%s\n","Word","Count");

    for(i in b) {

        printf("%-14s%d\n",i,b[i])|"sort-r -n -k2";

    }


  }

'

运行结果

[root@Test01awk]# cat a1.txt

123a123,555

456.333

566555!88,thisis a good boy.


[root@Test01awk]# ./word_freq.sh a1.txt

Word          Count

555           2

this          1

is            1

good          1

boy           1

a123          1

a             1

88            1

566           1

456           1

333           1

123           1

              1

[root@Test01awk]#



方法3:使用tr

[root@Test01awk]# cat a1.txt

123a123,555

456.333

566i555!88,this is a good boy.


[root@Test01awk]# cat a1.txt |tr '[:space:]|[:punct:]' '\n'|tr -s '\n'|sort|uniq -c|sort -n-k1 -r

      2 555

      1 this

      1 is

      1 good

      1 boy

      1 a123

      1 a

      1 88

      1 566i

      1 456

      1 333

      1 123

[root@Test01awk]#