#打开文件
a=open('r'C:\Users\用户\Desktop\文件名.txt',mode='r'')#获取以P开头的行数与字符数(法一)
count=0
word_sum=0
str_sum=0for i in a:
i=i.strip()#去除每一行两边的空格if i.startswith('p')or i.startswith('P'):print(i)#以p开头的行
count=count+1
word_sum=word_sum+len(i.split())
str_sum=str_sum+len(i)print('row number:',count)#以p开头的行数print('word number:',word_sum)#以p开头的行的单词数(不含空格)print('str number:',str_sum)#以p开头的行的字符数#获取以P开头的行数与单词数(法二)
count=0
word_sum=0for i in a:
i=i.strip()ifnot i.startswith('p'):continueprint(i)
count=count+1
word_sum=word_sum+len(i.split())print('row number:',count)print('word number:',word_sum)#获取文件的所有字符数
b=a.read()print(b)#所有字符print(len(b))#字符数print(len(b.split()))#单词数(不包含空格)print(b.split()[:4])#文件第1-4个单词
实例2:获取发件人
#获取发文件email.txt(from 123@utc.cn bai 5 09:14:16 2019)的人
a=open(r'c:\users\用户名\desktop\email.txt',mode='r')for i in a:
i=i.rstrip()ifnot i.startswith('f'):continue#忽略不以f开头的行,开始下一行循环
b=i.split()print(b[2])
实例3:word_count
#直接读取全文内容并创建dict,统计单词频次
test=open(r'C:\Users\xyy\Desktop\test.txt',mode='r')
word=dict()for i in test.read().split():
word[i]=word.get(i,0)+1print('单词统计:\n',word)print('单词统计项:\n',list(word.items()))print('单词列表:\n',list(word.keys()))print('单词数目列表:\n',list(word.values()))#直接读取全文内容并创建dict,统计字符频次
test=open(r'C:\Users\xyy\Desktop\test.txt',mode='r')str=dict()for i in test.read():if i notinstr:str[i]=1str[i]=str[i]+1print('字符统计:\n',str)#筛选最大频次的单词
test=open(r'C:\Users\xyy\Desktop\test.txt',mode='r')
word=dict()for i in test.read().split():
word[i]=word.get(i,0)+1print('单词统计:\n',word)
words=None
word_count=0print('频次最大的单词:')for i,j in word.items():if j>=max(word.values()):
words=i
word_count=j
print(words,word_count)print('频次最大的最后一个单词:',words,word_count)
#脚本(百度首页示例)import urllib.request,urllib.parse,urllib.error
defgetdatabyurllib(url):
response=urllib.request.urlopen(url)print(type(response))#响应类型#print(response.read().decode().strip())#直接读取并输出网页内容for line in response:#或者按照每行输出print(line.decode().strip())#读取每一行,并去掉前后空格if __name__ =="__main__":
url="http://www.baidu.com"#""必须有
getdatabyurllib(url)
#脚本(文本网页示例)import urllib.request,urllib.parse,urllib.error
defgetdatabyurllib(url):
response=urllib.request.urlopen(url)
counts=dict()for line in response:
words=line.decode().split()#将每一行文本转换为列表,默认以空格区分元素for word in words:
counts[word]=counts.get(word,0)+1#往字典中添加每个元素的频次print(counts)if __name__ =="__main__":
url="文本网页路径"
getdatabyurllib(url)
1、实例一#打开文件a=open('r'C:\Users\用户\Desktop\文件名.txt',mode='r'')#获取以P开头的行数与字符数(法一)count=0word_sum=0for i in a: i=i.strip()#去除每一行两边的空格 if i.startswith('p'): print(i)#以p开头的行 count=count+1 word...