[1].[代码] [Python]代码 跳至 [1] [2]
01 | def Cut(cutlist,lines): |
02 | l = [] |
03 | line = [] |
04 | |
05 | for i in lines: |
06 | if FindTok(cutlist,i): |
07 | l.append("".join(line)) |
08 | l.append(i) |
09 | line = [] |
10 | else : |
11 | line.append(i) |
12 | return l |
[2].[代码] [Python]代码 跳至 [1] [2]
1 | cutlist = "[。,,!……!《》<>\"'::?\?、\|“”‘’;]{}(){}【】(){}():?!。,;、~——+%%`:“”"'‘\n\r" .decode( 'gbk' ) |
2 | for lines in file (inputfilename): |
3 | l = Cut( list (cutlist), list (lines.decode( 'gbk' ))) |
4 | for line in l: |
5 | if line.strip() <> "": #这里可能包含空格 |
6 | li = line.strip().split() |
7 | for sentence in li: |
8 | print "se:" ,sentence |