#-*- coding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding("gbk")
import numpy as np
import jieba
import jieba.analyse
myList = [([""] *2) for i in range(400)]
#如果下面的字符超过400个,就会报错,需要修改
strzk3="""Deep learning models with convolutional and recurrent networks are now ubiquitous and analyze massive amounts of audio, image, video, text and graph data,
with applications in automatic translation, speech-to-text, scene understanding,
ranking user preferences, ad placement, etc. Competing frameworks for building
these networks such as TensorFlow, Chainer, CNTK, Torch/PyTorch, Caffe1/2,
MXNet and Theano, explore different tradeoffs between usability and expressive-
ness, research or production orientation and supported hardware. They operate
on a DAG of computational operators, wrapping high-performance libraries such
as CUDNN for NVIDIA GPUs or NNPACK for various CPUs, and automate
import re
txtp = open("test.html","r").read()
#file1=open('zp','w')
#把strzk1 按照 空格 分为 一个 列表 或 数组 , 然后 再 打印 出来
#用re
#查找资料
# Python的split方法函数可以分割字符串成列表,默认是以空格作为分隔符sep来分割字符串。
listzk=list(jieba.cut(strzk3)) # 为什么jieba分词后,findall 为0
for i in listzk:
#print i
p= listzk.index(i)
myList[p][0]=i
myList[p][1]=len(re.findall(i.decode('unicode_escape'),txtp))
#最昂贵的一个知识点关于结巴:i.decode('unicode_escape')jieba分词后为 unicode ,要转 中文,否则,findall 结果为0 ,
i=0
j=0
#sd=0
for i in range(len(myList)):
for j in range(len(myList[i])):
#if not sd<=len(listzk):
print myList[i][j]
#sd=ds+1
myList.sort(key=lambda x:x[1],reverse=True)
i=0
j=0
for i in range(len(myList)):
for j in range(len(myList[i])):
#if not len(i)==1:
print myList[i][j]
print chr(10)+chr(10)+chr(10)+strzk3
import sys
reload(sys)
sys.setdefaultencoding("gbk")
import numpy as np
import jieba
import jieba.analyse
myList = [([""] *2) for i in range(400)]
#如果下面的字符超过400个,就会报错,需要修改
strzk3="""Deep learning models with convolutional and recurrent networks are now ubiquitous and analyze massive amounts of audio, image, video, text and graph data,
with applications in automatic translation, speech-to-text, scene understanding,
ranking user preferences, ad placement, etc. Competing frameworks for building
these networks such as TensorFlow, Chainer, CNTK, Torch/PyTorch, Caffe1/2,
MXNet and Theano, explore different tradeoffs between usability and expressive-
ness, research or production orientation and supported hardware. They operate
on a DAG of computational operators, wrapping high-performance libraries such
as CUDNN for NVIDIA GPUs or NNPACK for various CPUs, and automate
memory allocation, synchronization, distribution."""#为什么这段话中带括号,就报错。
import re
txtp = open("test.html","r").read()
#file1=open('zp','w')
#把strzk1 按照 空格 分为 一个 列表 或 数组 , 然后 再 打印 出来
#用re
#查找资料
# Python的split方法函数可以分割字符串成列表,默认是以空格作为分隔符sep来分割字符串。
listzk=list(jieba.cut(strzk3)) # 为什么jieba分词后,findall 为0
for i in listzk:
#print i
p= listzk.index(i)
myList[p][0]=i
myList[p][1]=len(re.findall(i.decode('unicode_escape'),txtp))
#最昂贵的一个知识点关于结巴:i.decode('unicode_escape')jieba分词后为 unicode ,要转 中文,否则,findall 结果为0 ,
i=0
j=0
#sd=0
for i in range(len(myList)):
for j in range(len(myList[i])):
#if not sd<=len(listzk):
print myList[i][j]
#sd=ds+1
myList.sort(key=lambda x:x[1],reverse=True)
i=0
j=0
for i in range(len(myList)):
for j in range(len(myList[i])):
#if not len(i)==1:
print myList[i][j]
print chr(10)+chr(10)+chr(10)+strzk3