第七周 文件和数据格式化 7.1~7.6
-7.1文件的使用
tf = open("f.txt", "rt")
print(tf.readline())
tf.close()
print(tf.readline())
UnicodeDecodeError: ‘gbk’ codec can’t decode byte 0xad in position 3: illegal multibyte sequence
tf = open("f.txt", "rt", encoding='utf-8')
print(tf.readline())
tf.close()
“中国是个伟大的国家!”
tf = open("f.txt", "rb")
print(tf.readline())
tf.close()
b’"\xe4\xb8\xad\xe5\x9b\xbd\xe6\x98\xaf\xe4\xb8\xaa\xe4\xbc\x9f\xe5\xa4\xa7\xe7\x9a\x84\xe5\x9b\xbd\xe5\xae\xb6"’
>>> tf = open("C://Users//admin//Desktop//f.txt", "rt", encoding='utf-8')
>>> print(tf.read(2))
"中
>>> tf.readline()
'国是个伟大的国家!"'
>>> tf.read()
''
>>> tf.readline()
''
>>> tf.readlines()
[]
文本的全文操作
遍历全文本 一
fname = input("请输入要打开的文件名称:")
fo = open(fname, "r", encoding='utf-8')
txt = fo.read()
fo.close()
遍历全文本 二
fname = input("请输入要打开的文件名称:")
fo = open(fname, "r", encoding='utf-8')
txt = fo.read(2)
while txt != "":
txt = fo.read(2)
fo.close()
文件的逐行操作
逐行遍历文件 一
fname = input("请输入要打开的文件名称:")
fo = open(fname, "r", encoding='utf-8')
for line in fo.readlines():
print(line)
fo.close()
逐行遍历文件 二
fname = input("请输入要打开的文件名称:")
fo = open(fname, "r", encoding='utf-8')
for line in fo:
print(line)
fo.close()
请输入要打开的文件名称:f.txt
“中国是个伟大的国家!”
第二行!!
第三行@@
数据文件的写入
fo.write("你给我的giao giao!!")
ls = ["A", "B", "C"]
fo.writelines(ls)
fo = open("f.txt", "w+", encoding='utf-8')
fo.write("你给我的giao giao!!")
fo.seek(0)
for line in fo.readlines():
print(line)
fo.close()
你给我的giao giao!!
fo = open("f.txt", "w+", encoding='utf-8')
ls = ["A\n", "B\\n", "C"]
fo.writelines(ls)
fo.seek(0)
for line in fo.readlines():
print(line)
fo.close()
A
B\nC
-7.2实例11:自动轨迹绘制
# AutoTraceDraw.py
import turtle as t
t.title('自动轨迹绘制')
t.setup(800, 600, 0, 0)
t.pencolor("red")
t.pensize(5)
# 数据读取
datals = []
f = open("data.txt")
for line in f:
line = line.replace("\n", "")
datals.append(list(map(eval, line.split(","))))
f.close()
# 自动绘制
for i in range(len(datals)):
t.pencolor(datals[i][3], datals[i][4], datals[i][5])
t.fd(datals[i][0])
if datals[i][1]:
t.rt(datals[i][2])
else:
t.lt(datals[i][2])
t.done()
#data.txt
300,0,144,1,0,0
300,0,144,0,1,0
300,0,144,0,0,1
300,0,144,1,1,0
300,0,108,0,1,1
184,0,72,1,0,1
184,0,72,0,0,0
184,0,72,0,0,0
184,0,72,0,0,0
184,1,72,1,0,1
184,1,72,0,0,0
184,1,72,0,0,0
184,1,72,0,0,0
184,1,72,0,0,0
184,1,720,0,0,0
-7.3一维数据的格式化和处理
f = open("1.txt").read()
ls = f.split()
print(ls)
[‘1’, ‘2’, ‘3’, ‘4$5$6’, ‘7#8#9’]
f = open("1.txt").read()
ls = f.split("$")
print(ls)
[‘1 2 3\n4’, ‘5’, ‘6\n7#8#9’]
ls = ['中国', '美国', '日本']
f = open("1.txt", 'w+')
f.write(' '.join(ls))
f.seek(0)
for line in f:
print(line)
f.close()
中国 美国 日本
ls = ['中国', '美国', '日本']
f = open("1.txt", 'w+')
f.write('$$'.join(ls))
f.seek(0)
for line in f:
print(line)
f.close()
中国 美 国 美国 美国日本
-7.4二维数据的格式化和处理
二维数据的读入
f = open("1.txt", "w+", encoding='utf-8')
ls = []
for line in f:
line = line.replace("\n", "")
ls.append(line.split(","))
f.close()
二维数据的写入
ls = [[], [], []]
f = open("1.txt", "w", encoding='utf-8')
for item in ls:
f.write(','.join("#123# ") + '\n')
f.close()
#,1,2,3,#,
#,1,2,3,#,
#,1,2,3,#,
二维数据的逐一处理
ls = [[1,2], [3,4], [5,6]]
for row in ls:
for column in row:
print(column)
1
2
3
4
5
6
-7.5模块6:wordcloud库的使用
import wordcloud
w =wordcloud.WordCloud()
w.generate("wo ai ni")
w.to_file("1.png")
#coding=utf-8
import wordcloud
txt = "ao ai ni LOVE 爱"
w = wordcloud.WordCloud(background_color='skyblue')
w.generate(txt)
w.to_file("1.png")
中文显示不出来
import jieba
import wordcloud
txt = "我喜欢你很久了,你好!我很想你。"
w = wordcloud.WordCloud(background_color='skyblue', width=2300, height=2200, font_path="C://Windows//Fonts//msyh.ttc")
w.generate(" ".join(jieba.lcut(txt)))
w.to_file("1.png")
font_path="C://Windows//Fonts//msyh.ttc"找到了系统里的中文字体
Building prefix dict from the default dictionary …
Loading model from cache C:\Users\Admin\AppData\Local\Temp\jieba.cache
Loading model cost 1.107 seconds.
Prefix dict has been built successfully.
-7.6实例12:政府工作报告词云
https://python123.io/resources/pye/新时代中国特色社会主义.txt
常规矩形词云
# GovRptWordCloudv1.py
import jieba
import wordcloud
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc"
)
w.generate(txt)
w.to_file("grwordcloud.png")
https://python123.io/resources/pye/关于实施乡村振兴战略的意见.txt
# GovRptWordCloudv1.py
import jieba
import wordcloud
f = open("关于实施乡村振兴战略的意见.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc"
)
w.generate(txt)
w.to_file("grwordcloud.png")
# GovRptWordCloudv1.py
import jieba
import wordcloud
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc",
max_words=15
)
w.generate(txt)
w.to_file("grwordcloud.png")
# GovRptWordCloudv1.py
import jieba
import wordcloud
f = open("关于实施乡村振兴战略的意见.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc",
max_words=15
)
w.generate(txt)
w.to_file("grwordcloud.png")
不规则图形词云
# GovRptWordCloudv2.py
import jieba
import wordcloud
from scipy.misc import imread
mask = imread("chinamap.jpg")
excludes = {}
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc", mask=mask
)
w.generate(txt)
w.to_file("grwordcloudm.png")
(示例代码中的from scipy.misc import imread已被淘汰,😔自己作了以下小尝试)
方法一:×
from imageio import imread
mask =imageio.imread("fivestar.png")
NameError: name ‘imageio’ is not defined
方法二:√
import imageio
mask =imageio.imread("biglogo.png")
“C:\Program Files\Python38\python3.exe” C:/Users/Admin/Desktop/1.py
Traceback (most recent call last):
File “C:/Users/Admin/Desktop/1.py”, line 6, in
mask =imageio.imread(“biglogo.png”)
File “C:\Users\Admin\AppData\Roaming\Python\Python38\site-packages\imageio\core\functions.py”, line 265, in imread
reader = read(uri, format, “i”, **kwargs)
File “C:\Users\Admin\AppData\Roaming\Python\Python38\site-packages\imageio\core\functions.py”, line 172, in get_reader
request = Request(uri, “r” + mode, **kwargs)
File “C:\Users\Admin\AppData\Roaming\Python\Python38\site-packages\imageio\core\request.py”, line 124, in init
self._parse_uri(uri)
File “C:\Users\Admin\AppData\Roaming\Python\Python38\site-packages\imageio\core\request.py”, line 260, in _parse_uri
raise FileNotFoundError(“No such file: ‘%s’” % fn)
FileNotFoundError: No such file: ‘C:\Users\Admin\Desktop\biglogo.png’
Process finished with exit code 1
完整代码就是:
# GovRptWordCloudv2.py
import jieba
import wordcloud
#from scipy.misc import imread
import imageio
mask =imageio.imread("biglogo.png")
excludes = {}
f = open("新时代中国特色社会主义.txt", "r", encoding="utf-8")
t = f.read()
f.close()
ls = jieba.lcut(t)
txt = " ".join(ls)
w = wordcloud.WordCloud( \
width=1000, height=700, \
background_color="white",
font_path="msyh.ttc", mask=mask
)
w.generate(txt)
w.to_file("grwordcloudm.png")
代码改好,你都没图还玩什么?嗯!要搞到白色背景的图片 <( ̄︶ ̄)↗
把图片和py文件放一起,每次更改xxx.png或xxx.jpg 就😄
截的图不干净…
你看到这里了,点个赞8!