import xlrd import xlwt #(2)读取文件内容 def read_excel(file_name): #定义读取文件函数 wb = xlrd.open_workbook(file_name) #创建读取文件的对象wb sheet = wb.sheet_by_index(0) schools = [] for row in range(sheet.nrows): school = [] for col in range(sheet.ncols): content = sheet.cell_value(row,col) school.append(content) schools.append(school) return schools #提供返回值 #(3)写入文件内容 def write_excel(schools): #定义写入文件函数 #(2)构造工作簿:Workbook wb = xlwt.Workbook(encoding = 'utf-8') #创建写入文件的对象wb #(3)为工作簿添加表单:Worksheet s = wb.add_sheet('上海市高校信息表') #创建一个表单 #(4)根据行列序号写入内容 #添加文本的样式(字体和对齐) font = xlwt.Font() #字体 font.bold = True font.height = 400 font.underline = True font.colour_index = 6 #0:黑,1:白,2:红,3:绿,4:蓝,5:黄,6:紫 align = xlwt.Alignment() #对齐 align.horz = 2 #水平居中 1:左,2:中,3:右 align.vert = 1 #垂直居中 0:上,1:中,2:下 style = xlwt.XFStyle() #样式 style.font = font style.alignment = align s.write_merge(0,0,0,6,'上海市高校信息表',style) #写表标题并合并单元格(A1:A7) for col in range(7): #写表列表名称 s.write(1,col,schools[0][col]) #第2行第1列开始写内容school[0][col] row_num = 2 #从第3行开始写数据 for school in schools: #一行一行写数据内容 if school[2] == '上海市': for col in range(7): s.write(row_num,col,school[col]) row_num = row_num + 1 #(5)保存文件内容 wb.save('../R&Q_pic/上海市高校信息表.xls') school_list = read_excel('../Stu_pack/wordcloud/school.xls') #调用读取函数,将素材里的文件school.xls作为实参传递给形参 write_excel(school_list) #调用写入函数,将读取的数据作为实参传递给形参写入到文件"上海市高校信息表.xls"里保存 schools_list = read_excel('../R&Q_pic/上海市高校信息表.xls') i=1 for school in schools_list: if i<13: print(school) i+=1
['上海市高校信息表', '', '', '', '', '', ''] ['招生单位代码', '招生单位名称', '所在省份', '是否985', '是否211', '是否自主划线', '学校类型'] ['10246', '复旦大学', '上海市', '是', '是', '是', '综合类'] ['10247', '同济大学', '上海市', '是', '是', '是', '理工类'] ['10248', '上海交通大学', '上海市', '是', '是', '是', '综合类'] ['10251', '华东理工大学', '上海市', '否', '是', '否', '理工类'] ['10252', '上海理工大学', '上海市', '否', '否', '否', '理工类'] ['10254', '上海海事大学', '上海市', '否', '否', '否', '理工类'] ['10255', '东华大学', '上海市', '否', '是', '否', '理工类'] ['10256', '上海电力学院', '上海市', '否', '否', '否', '理工类'] ['10259', '上海应用技术大学', '上海市', '否', '否', '否', '理工类'] ['10264', '上海海洋大学', '上海市', '否', '否', '否', '农林类']
In [9]:
import pandas as pd
In [20]:
data = pd.read_excel('../Stu_pack/wordcloud/school.xls',index_col = 0) data
Out[20]:
招生单位名称 | 所在省份 | 是否985 | 是否211 | 是否自主划线 | 学校类型 | |
---|---|---|---|---|---|---|
招生单位代码 | ||||||
10001 | 北京大学 | 北京市 | 是 | 是 | 是 | 综合类 |
10002 | 中国人民大学 | 北京市 | 是 | 是 | 是 | 综合类 |
10003 | 清华大学 | 北京市 | 是 | 是 | 是 | 理工类 |
10004 | 北京交通大学 | 北京市 | 否 | 是 | 否 | 理工类 |
10005 | 北京工业大学 | 北京市 | 否 | 是 | 否 | 理工类 |
... | ... | ... | ... | ... | ... | ... |
90114 | 北京系统工程研究所 | 北京市 | 否 | 否 | 否 | NaN |
90115 | 解放军医学院 | 北京市 | 否 | 否 | 否 | NaN |
90201 | 武警指挥学院 | 天津市 | 否 | 否 | 否 | NaN |
90202 | 武警工程大学 | 陕西省 | 否 | 否 | 否 | 军事类 |
90203 | 武警后勤学院 | 天津市 | 否 | 否 | 否 | NaN |
870 rows × 6 columns
In [21]:
data.head()
Out[21]:
招生单位名称 | 所在省份 | 是否985 | 是否211 | 是否自主划线 | 学校类型 | |
---|---|---|---|---|---|---|
招生单位代码 | ||||||
10001 | 北京大学 | 北京市 | 是 | 是 | 是 | 综合类 |
10002 | 中国人民大学 | 北京市 | 是 | 是 | 是 | 综合类 |
10003 | 清华大学 | 北京市 | 是 | 是 | 是 | 理工类 |
10004 | 北京交通大学 | 北京市 | 否 | 是 | 否 | 理工类 |
10005 | 北京工业大学 | 北京市 | 否 | 是 | 否 | 理工类 |
In [22]:
print(data.head())
招生单位名称 所在省份 是否985 是否211 是否自主划线 学校类型 招生单位代码 10001 北京大学 北京市 是 是 是 综合类 10002 中国人民大学 北京市 是 是 是 综合类 10003 清华大学 北京市 是 是 是 理工类 10004 北京交通大学 北京市 否 是 否 理工类 10005 北京工业大学 北京市 否 是 否 理工类
In [23]:
data.tail(10)
Out[23]:
招生单位名称 | 所在省份 | 是否985 | 是否211 | 是否自主划线 | 学校类型 | |
---|---|---|---|---|---|---|
招生单位代码 | ||||||
90109 | 中国航天员科研训练中心 | 北京市 | 否 | 否 | 否 | NaN |
90110 | 北京跟踪与通信技术研究所 | 北京市 | 否 | 否 | 否 | NaN |
90111 | 中国国防科技信息中心 | 北京市 | 否 | 否 | 否 | NaN |
90112 | 西北核技术研究所 | 陕西省 | 否 | 否 | 否 | NaN |
90113 | 中国空气动力研究与发展中心 | 四川省 | 否 | 否 | 否 | NaN |
90114 | 北京系统工程研究所 | 北京市 | 否 | 否 | 否 | NaN |
90115 | 解放军医学院 | 北京市 | 否 | 否 | 否 | NaN |
90201 | 武警指挥学院 | 天津市 | 否 | 否 | 否 | NaN |
90202 | 武警工程大学 | 陕西省 | 否 | 否 | 否 | 军事类 |
90203 | 武警后勤学院 | 天津市 | 否 | 否 | 否 | NaN |
In [24]:
data.count()
Out[24]:
招生单位名称 870 所在省份 870 是否985 870 是否211 870 是否自主划线 870 学校类型 454 dtype: int64
In [27]:
data = data[data.所在省份=='上海市'] data data.to_excel('../R&Q_pic/上海市高校信息.xls')
C:\Users\Administrator\AppData\Local\Temp\ipykernel_10580\1368619985.py:3: FutureWarning: As the xlwt package is no longer maintained, the xlwt engine will be removed in a future version of pandas. This is the only engine in pandas that supports writing in the xls format. Install openpyxl and write to an xlsx file instead. You can set the option io.excel.xls.writer to 'xlwt' to silence this warning. While this option is deprecated and will also raise a warning, it can be globally set and the warning suppressed. data.to_excel('../R&Q_pic/上海市高校信息.xls')
三,词云库wordcloud的安装与应用
1,安装
pip install wordcloud #网络安装
python -m pip install 本地路径 #本地安装
2,应用
生成步骤:
创建词云对象-->加载词云文本-->输出词云图片(文件)
(1)默认的矩形词云图片
(2)提供的图形词云图片
In [48]:
import wordcloud #导入库模块 from PIL import Image wc = wordcloud.WordCloud(font_path = '..\Stu_pack\wordcloud\simhei.ttf') #创建词云对象 with open(r'..\Stu_pack\wordcloud\Dream It Possible.txt',encoding = 'utf-8') as file: fr = file.read() wc.generate(fr) #加载词云文本 wc.to_file('../R&Q_pic/test.png') Image.open('../R&Q_pic/test.png')
Out[48]:
In [52]:
import wordcloud #导入库模块 from PIL import Image import imageio from imageio import imread with open(r'..\Stu_pack\wordcloud\万疆.txt',encoding = 'utf-8') as file: #读取词云文本文件 fr = file.read() wc = wordcloud.WordCloud(mask = im,font_path = '..\Stu_pack\wordcloud\simhei.ttf',background_color = '#ff0000') #创建词云对象 im = imread('..\Stu_pack\wordcloud\Love_Star.PNG') # im = Image.open('../Stu_pack/wordcloud/Love_Star.PNG') 不能有这种方式读取遮罩图片 wc.generate(fr) #加载词云文本 wc.to_file('../R&Q_pic/test1.png') Image.open('../R&Q_pic/test1.png')
C:\Users\Administrator\AppData\Local\Temp\ipykernel_10580\452304905.py:11: DeprecationWarning: Starting with ImageIO v3 the behavior of this function will switch to that of iio.v3.imread. To keep the current behavior (and make this warning dissapear) use `import imageio.v2 as imageio` or call `imageio.v2.imread` directly. im = imread('..\Stu_pack\wordcloud\Love_Star.PNG')
Out[52]:
In [57]:
import wordcloud #导入库模块 import imageio from imageio import imread import matplotlib.pyplot as plt with open(r'..\Stu_pack\wordcloud\万疆.txt',encoding = 'utf-8') as file: #读取词云文本文件 fr = file.read() im = imread('..\Stu_pack\wordcloud\Love_Star.PNG') # im = Image.open('../Stu_pack/wordcloud/Love_Star.PNG') 不能有这种方式读取遮罩图片 wc = wordcloud.WordCloud(mask = im,font_path = '..\Stu_pack\wordcloud\simhei.ttf',background_color = '#ff0000') wc.generate(fr) #加载词云文本 plt.imshow(wc) plt.axis('off') #隐藏坐标系 plt.savefig('../R&Q_pic/test2.png') #保存图片路径 plt.show() #显示绘制图片 #wc.to_file('../R&Q_pic/test1.png') #Image.open('../R&Q_pic/test1.png')
C:\Users\Administrator\AppData\Local\Temp\ipykernel_10580\2513047111.py:9: DeprecationWarning: Starting with ImageIO v3 the behavior of this function will switch to that of iio.v3.imread. To keep the current behavior (and make this warning dissapear) use `import imageio.v2 as imageio` or call `imageio.v2.imread` directly. im = imread('..\Stu_pack\wordcloud\Love_Star.PNG')
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [38]:
import jieba
In [41]:
txt = jieba.lcut('上海立达学院数字科学学院')
In [43]:
txt = ' '.join(txt)
In [ ]:
In [35]:
pip show jieba
Name: jieba Version: 0.42.1 Summary: Chinese Words Segmentation Utilities Home-page: https://github.com/fxsjy/jieba Author: Sun, Junyi Author-email: ccnusjy@gmail.com License: MIT Location: c:\users\administrator\anaconda3\lib\site-packages Requires: Required-by: Note: you may need to restart the kernel to use updated packages.
In [34]:
pip install jieba #安装中文分词库
Collecting jieba Downloading jieba-0.42.1.tar.gz (19.2 MB) --------------------------------------- 19.2/19.2 MB 34.4 MB/s eta 0:00:00 Preparing metadata (setup.py): started Preparing metadata (setup.py): finished with status 'done' Building wheels for collected packages: jieba Building wheel for jieba (setup.py): started Building wheel for jieba (setup.py): finished with status 'done' Created wheel for jieba: filename=jieba-0.42.1-py3-none-any.whl size=19314459 sha256=211b001a4ffcd919ba4b53454d277b0406f4c1c60875939f1fe1f27b6f547dea Stored in directory: c:\users\administrator\appdata\local\pip\cache\wheels\7d\74\cf\08c94db4b784e2c1ef675a600b7b5b281fd25240dcb954ee7e Successfully built jieba Installing collected packages: jieba Successfully installed jieba-0.42.1 Note: you may need to restart the kernel to use updated packages.
In [36]:
pip show wordcloud
Name: wordcloud Version: 1.9.2 Summary: A little word cloud generator Home-page: https://github.com/amueller/word_cloud Author: Andreas Mueller Author-email: t3kcit+wordcloud@gmail.com License: MIT Location: c:\users\administrator\anaconda3\lib\site-packages Requires: matplotlib, numpy, pillow Required-by: Note: you may need to restart the kernel to use updated packages.
In [30]:
pip install wordcloud
Collecting wordcloud Downloading wordcloud-1.9.2-cp39-cp39-win_amd64.whl (153 kB) ------------------------------------ 153.3/153.3 kB 398.5 kB/s eta 0:00:00 Requirement already satisfied: pillow in c:\users\administrator\anaconda3\lib\site-packages (from wordcloud) (9.2.0) Requirement already satisfied: matplotlib in c:\users\administrator\anaconda3\lib\site-packages (from wordcloud) (3.5.2) Requirement already satisfied: numpy>=1.6.1 in c:\users\administrator\anaconda3\lib\site-packages (from wordcloud) (1.21.5) Requirement already satisfied: pyparsing>=2.2.1 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (3.0.9) Requirement already satisfied: cycler>=0.10 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (0.11.0) Requirement already satisfied: python-dateutil>=2.7 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (2.8.2) Requirement already satisfied: fonttools>=4.22.0 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (4.25.0) Requirement already satisfied: packaging>=20.0 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (21.3) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (1.4.2) Requirement already satisfied: six>=1.5 in c:\users\administrator\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib->wordcloud) (1.16.0) Installing collected packages: wordcloud Successfully installed wordcloud-1.9.2 Note: you may need to restart the kernel to use updated packages.
In [4]:
pip show xlwt #查询第三方库
Name: xlwt Version: 1.3.0 Summary: Library to create spreadsheet files compatible with MS Excel 97/2000/XP/2003 XLS files, on any platform, with Python 2.6, 2.7, 3.3+ Home-page: http://www.python-excel.org/ Author: John Machin Author-email: sjmachin@lexicon.net License: BSD Location: c:\users\administrator\anaconda3\lib\site-packages Requires: Required-by: Note: you may need to restart the kernel to use updated packages.
In [3]:
pip install xlwt #安装xlwt
Collecting xlwt Downloading xlwt-1.3.0-py2.py3-none-any.whl (99 kB) ------------------------------------- 100.0/100.0 kB 11.3 kB/s eta 0:00:00 Installing collected packages: xlwt Successfully installed xlwt-1.3.0 Note: you may need to restart the kernel to use updated packages.