二,用pandas读写Excel文件

import xlrd
import xlwt

#(2)读取文件内容
def read_excel(file_name):                  #定义读取文件函数
    wb = xlrd.open_workbook(file_name)      #创建读取文件的对象wb  
    sheet = wb.sheet_by_index(0)
    schools = []
    for row in range(sheet.nrows):
        school = []
        for col in range(sheet.ncols):
            content = sheet.cell_value(row,col)
            school.append(content)
        schools.append(school)
    return schools     #提供返回值


#(3)写入文件内容

def write_excel(schools):                   #定义写入文件函数
                                                                                #(2)构造工作簿:Workbook
    wb = xlwt.Workbook(encoding = 'utf-8')  #创建写入文件的对象wb
                                                                                #(3)为工作簿添加表单:Worksheet
    s = wb.add_sheet('上海市高校信息表')    #创建一个表单
                                                                                #(4)根据行列序号写入内容
    #添加文本的样式(字体和对齐)
    font = xlwt.Font()          #字体
    font.bold = True
    font.height = 400
    font.underline = True
    font.colour_index = 6                            #0:黑,1:白,2:红,3:绿,4:蓝,5:黄,6:紫
    align = xlwt.Alignment()    #对齐
    align.horz = 2    #水平居中 1:左,2:中,3:右
    align.vert = 1    #垂直居中 0:上,1:中,2:下
    style = xlwt.XFStyle()      #样式
    style.font = font
    style.alignment = align
    
    s.write_merge(0,0,0,6,'上海市高校信息表',style)    #写表标题并合并单元格(A1:A7)
    for col in range(7):                        #写表列表名称
        s.write(1,col,schools[0][col])      #第2行第1列开始写内容school[0][col]
    row_num = 2                             #从第3行开始写数据
    for school in schools:                  #一行一行写数据内容
        if school[2] == '上海市':
            for col in range(7):
                s.write(row_num,col,school[col])
            row_num = row_num + 1
                                                                                #(5)保存文件内容
    wb.save('../R&Q_pic/上海市高校信息表.xls')

school_list = read_excel('../Stu_pack/wordcloud/school.xls') #调用读取函数,将素材里的文件school.xls作为实参传递给形参
write_excel(school_list)  #调用写入函数,将读取的数据作为实参传递给形参写入到文件"上海市高校信息表.xls"里保存

schools_list = read_excel('../R&Q_pic/上海市高校信息表.xls')
i=1
for school in schools_list:
    if i<13:
        print(school)
        i+=1
['上海市高校信息表', '', '', '', '', '', '']
['招生单位代码', '招生单位名称', '所在省份', '是否985', '是否211', '是否自主划线', '学校类型']
['10246', '复旦大学', '上海市', '是', '是', '是', '综合类']
['10247', '同济大学', '上海市', '是', '是', '是', '理工类']
['10248', '上海交通大学', '上海市', '是', '是', '是', '综合类']
['10251', '华东理工大学', '上海市', '否', '是', '否', '理工类']
['10252', '上海理工大学', '上海市', '否', '否', '否', '理工类']
['10254', '上海海事大学', '上海市', '否', '否', '否', '理工类']
['10255', '东华大学', '上海市', '否', '是', '否', '理工类']
['10256', '上海电力学院', '上海市', '否', '否', '否', '理工类']
['10259', '上海应用技术大学', '上海市', '否', '否', '否', '理工类']
['10264', '上海海洋大学', '上海市', '否', '否', '否', '农林类']

In [9]:

import pandas as pd

In [20]:

data = pd.read_excel('../Stu_pack/wordcloud/school.xls',index_col = 0)
data

Out[20]:

招生单位名称所在省份是否985是否211是否自主划线学校类型
招生单位代码
10001北京大学北京市综合类
10002中国人民大学北京市综合类
10003清华大学北京市理工类
10004北京交通大学北京市理工类
10005北京工业大学北京市理工类
.....................
90114北京系统工程研究所北京市NaN
90115解放军医学院北京市NaN
90201武警指挥学院天津市NaN
90202武警工程大学陕西省军事类
90203武警后勤学院天津市NaN

870 rows × 6 columns

In [21]:

data.head()

Out[21]:

招生单位名称所在省份是否985是否211是否自主划线学校类型
招生单位代码
10001北京大学北京市综合类
10002中国人民大学北京市综合类
10003清华大学北京市理工类
10004北京交通大学北京市理工类
10005北京工业大学北京市理工类

In [22]:

print(data.head())
        招生单位名称 所在省份 是否985 是否211 是否自主划线 学校类型
招生单位代码                                     
10001     北京大学  北京市     是     是      是  综合类
10002   中国人民大学  北京市     是     是      是  综合类
10003     清华大学  北京市     是     是      是  理工类
10004   北京交通大学  北京市     否     是      否  理工类
10005   北京工业大学  北京市     否     是      否  理工类

In [23]:

data.tail(10)

Out[23]:

招生单位名称所在省份是否985是否211是否自主划线学校类型
招生单位代码
90109中国航天员科研训练中心北京市NaN
90110北京跟踪与通信技术研究所北京市NaN
90111中国国防科技信息中心北京市NaN
90112西北核技术研究所陕西省NaN
90113中国空气动力研究与发展中心四川省NaN
90114北京系统工程研究所北京市NaN
90115解放军医学院北京市NaN
90201武警指挥学院天津市NaN
90202武警工程大学陕西省军事类
90203武警后勤学院天津市NaN

In [24]:

data.count()

Out[24]:

招生单位名称    870
所在省份      870
是否985     870
是否211     870
是否自主划线    870
学校类型      454
dtype: int64

In [27]:

data = data[data.所在省份=='上海市']
data
data.to_excel('../R&Q_pic/上海市高校信息.xls')
C:\Users\Administrator\AppData\Local\Temp\ipykernel_10580\1368619985.py:3: FutureWarning: As the xlwt package is no longer maintained, the xlwt engine will be removed in a future version of pandas. This is the only engine in pandas that supports writing in the xls format. Install openpyxl and write to an xlsx file instead. You can set the option io.excel.xls.writer to 'xlwt' to silence this warning. While this option is deprecated and will also raise a warning, it can be globally set and the warning suppressed.
  data.to_excel('../R&Q_pic/上海市高校信息.xls')

三,词云库wordcloud的安装与应用

1,安装
pip install wordcloud  #网络安装
python -m pip install 本地路径    #本地安装
2,应用
生成步骤:
    创建词云对象-->加载词云文本-->输出词云图片(文件)
(1)默认的矩形词云图片
(2)提供的图形词云图片

In [48]:

import wordcloud          #导入库模块
from PIL import Image
wc = wordcloud.WordCloud(font_path = '..\Stu_pack\wordcloud\simhei.ttf')      #创建词云对象

with open(r'..\Stu_pack\wordcloud\Dream It Possible.txt',encoding = 'utf-8') as file:
    fr = file.read()


wc.generate(fr)     #加载词云文本

wc.to_file('../R&Q_pic/test.png')

Image.open('../R&Q_pic/test.png')

Out[48]:

In [52]:

import wordcloud          #导入库模块
from PIL import Image
import imageio
from imageio import imread

with open(r'..\Stu_pack\wordcloud\万疆.txt',encoding = 'utf-8') as file:     #读取词云文本文件
    fr = file.read()
    
wc = wordcloud.WordCloud(mask = im,font_path = '..\Stu_pack\wordcloud\simhei.ttf',background_color = '#ff0000')      #创建词云对象

im = imread('..\Stu_pack\wordcloud\Love_Star.PNG')
# im = Image.open('../Stu_pack/wordcloud/Love_Star.PNG')         不能有这种方式读取遮罩图片
wc.generate(fr)     #加载词云文本

wc.to_file('../R&Q_pic/test1.png')

Image.open('../R&Q_pic/test1.png')
C:\Users\Administrator\AppData\Local\Temp\ipykernel_10580\452304905.py:11: DeprecationWarning: Starting with ImageIO v3 the behavior of this function will switch to that of iio.v3.imread. To keep the current behavior (and make this warning dissapear) use `import imageio.v2 as imageio` or call `imageio.v2.imread` directly.
  im = imread('..\Stu_pack\wordcloud\Love_Star.PNG')

Out[52]:

In [57]:

import wordcloud          #导入库模块
import imageio
from imageio import imread
import matplotlib.pyplot as plt

with open(r'..\Stu_pack\wordcloud\万疆.txt',encoding = 'utf-8') as file:     #读取词云文本文件
    fr = file.read()

im = imread('..\Stu_pack\wordcloud\Love_Star.PNG')
# im = Image.open('../Stu_pack/wordcloud/Love_Star.PNG')         不能有这种方式读取遮罩图片
wc = wordcloud.WordCloud(mask = im,font_path = '..\Stu_pack\wordcloud\simhei.ttf',background_color = '#ff0000') 

wc.generate(fr)     #加载词云文本

plt.imshow(wc)
plt.axis('off')    #隐藏坐标系
plt.savefig('../R&Q_pic/test2.png')    #保存图片路径
plt.show()         #显示绘制图片



#wc.to_file('../R&Q_pic/test1.png')
#Image.open('../R&Q_pic/test1.png')
C:\Users\Administrator\AppData\Local\Temp\ipykernel_10580\2513047111.py:9: DeprecationWarning: Starting with ImageIO v3 the behavior of this function will switch to that of iio.v3.imread. To keep the current behavior (and make this warning dissapear) use `import imageio.v2 as imageio` or call `imageio.v2.imread` directly.
  im = imread('..\Stu_pack\wordcloud\Love_Star.PNG')

In [ ]:

 

In [ ]:

 

In [ ]:

 

In [ ]:

 

In [ ]:

 

In [38]:

import jieba

In [41]:

txt = jieba.lcut('上海立达学院数字科学学院')

In [43]:

txt = ' '.join(txt)

In [ ]:

 

In [35]:

pip show jieba
Name: jieba
Version: 0.42.1
Summary: Chinese Words Segmentation Utilities
Home-page: https://github.com/fxsjy/jieba
Author: Sun, Junyi
Author-email: ccnusjy@gmail.com
License: MIT
Location: c:\users\administrator\anaconda3\lib\site-packages
Requires: 
Required-by: 
Note: you may need to restart the kernel to use updated packages.

In [34]:

pip install jieba           #安装中文分词库
Collecting jieba
  Downloading jieba-0.42.1.tar.gz (19.2 MB)
     --------------------------------------- 19.2/19.2 MB 34.4 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: jieba
  Building wheel for jieba (setup.py): started
  Building wheel for jieba (setup.py): finished with status 'done'
  Created wheel for jieba: filename=jieba-0.42.1-py3-none-any.whl size=19314459 sha256=211b001a4ffcd919ba4b53454d277b0406f4c1c60875939f1fe1f27b6f547dea
  Stored in directory: c:\users\administrator\appdata\local\pip\cache\wheels\7d\74\cf\08c94db4b784e2c1ef675a600b7b5b281fd25240dcb954ee7e
Successfully built jieba
Installing collected packages: jieba
Successfully installed jieba-0.42.1
Note: you may need to restart the kernel to use updated packages.

In [36]:

pip show wordcloud
Name: wordcloud
Version: 1.9.2
Summary: A little word cloud generator
Home-page: https://github.com/amueller/word_cloud
Author: Andreas Mueller
Author-email: t3kcit+wordcloud@gmail.com
License: MIT
Location: c:\users\administrator\anaconda3\lib\site-packages
Requires: matplotlib, numpy, pillow
Required-by: 
Note: you may need to restart the kernel to use updated packages.

In [30]:

pip install wordcloud
Collecting wordcloud
  Downloading wordcloud-1.9.2-cp39-cp39-win_amd64.whl (153 kB)
     ------------------------------------ 153.3/153.3 kB 398.5 kB/s eta 0:00:00
Requirement already satisfied: pillow in c:\users\administrator\anaconda3\lib\site-packages (from wordcloud) (9.2.0)
Requirement already satisfied: matplotlib in c:\users\administrator\anaconda3\lib\site-packages (from wordcloud) (3.5.2)
Requirement already satisfied: numpy>=1.6.1 in c:\users\administrator\anaconda3\lib\site-packages (from wordcloud) (1.21.5)
Requirement already satisfied: pyparsing>=2.2.1 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (3.0.9)
Requirement already satisfied: cycler>=0.10 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (0.11.0)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (2.8.2)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (4.25.0)
Requirement already satisfied: packaging>=20.0 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (21.3)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\administrator\anaconda3\lib\site-packages (from matplotlib->wordcloud) (1.4.2)
Requirement already satisfied: six>=1.5 in c:\users\administrator\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib->wordcloud) (1.16.0)
Installing collected packages: wordcloud
Successfully installed wordcloud-1.9.2
Note: you may need to restart the kernel to use updated packages.

In [4]:

pip show xlwt           #查询第三方库
Name: xlwt
Version: 1.3.0
Summary: Library to create spreadsheet files compatible with MS Excel 97/2000/XP/2003 XLS files, on any platform, with Python 2.6, 2.7, 3.3+
Home-page: http://www.python-excel.org/
Author: John Machin
Author-email: sjmachin@lexicon.net
License: BSD
Location: c:\users\administrator\anaconda3\lib\site-packages
Requires: 
Required-by: 
Note: you may need to restart the kernel to use updated packages.

In [3]:

pip install xlwt         #安装xlwt
Collecting xlwt
  Downloading xlwt-1.3.0-py2.py3-none-any.whl (99 kB)
     ------------------------------------- 100.0/100.0 kB 11.3 kB/s eta 0:00:00
Installing collected packages: xlwt
Successfully installed xlwt-1.3.0
Note: you may need to restart the kernel to use updated packages.
  • 21
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值