python的txt、csv、ini、xml、excel文件相关操作
-
函数,一个用于专门实现某个功能的代码块(可重用)
-
内置函数
len、bin、oct、hex 等
-
自定义函数
# 定义了一个函数,功能代码块 def send_email(): # 写了10行代码,实现了发送邮件。 pass goods = [ {"name": "电脑", "price": 1999}, {"name": "鼠标", "price": 10}, {"name": "游艇", "price": 20}, {"name": "美女", "price": 998} ] for index in range(len(goods)): item = goods[index] print(index + 1, item['name'], item['price']) # 调用并执行函数 send_email() while True: num = input("请输入要选择的商品序号(Q/q):") # "1" if num.upper() == "Q": break if not num.isdecimal(): print("用输入的格式错误") break num = int(num) send_email() if num > 4 or num < 0: print("范围选择错误") break target_index = num - 1 choice_item = goods[target_index] print(choice_item["name"], choice_item['price']) send_email()
-
-
模块,集成了很多功能的函数集合。
-
内置模块,Python内部帮助我们提供好的。
import random num = random.randint(0,19)
-
第三方模块,网上下载别人写好的模块(功能集合)。
-
自定义模块
-
1. 文件操作
-
字符串类型(str),在程序中用于表示文字信息,本质上是unicode编码中的二进制
-
字节类型(bytes)
-
可表示文字信息,本质上是utf-8/gbk等编码的二进制(对unicode进行压缩,方便文件存储和网络传输。)
name = "达莱" data = name.encode('utf-8') print(data) # b'\xe8\xbe\xbe\xe8\x8e\xb1' result = data.decode('utf-8') print(result) # "达莱"
-
可表示原始二进制(图片、文件等信息)
-
1.1 读文件
-
读文本文件
# 1.打开文件 # - 路径: # 相对路径:'info.txt' # 绝对路径:'E:/python/Project/two/day09/file/info.txt' # - 模式 # rb,表示读取文件原始的二进制(r, 读 read;b, 二进制 binary;) # 1.打开文件 file_object=open("file/info.txt",mode="rb") # 2.读取文件内容,并赋值给data data = file_object.read() # 3.关闭文件 file_object.close() print(data) # b'\xe8\xbe\xbe\xe8\x8e\xb1' text = data.decode("utf-8") print(text)
# 1.打开文件 file_object = open('file/info.txt', mode='rt', encoding="utf-8") # 2.读取文件内容,并赋值给data data = file_object.read() # 3.关闭文件 file_object.close() print(data)
-
读文件
file_object = open("file/jt.jpg", mode="rb") date = file_object.read() file_object.close() print(date) #b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\...
注意事项
-
路径
-
相对路径,确定程序的运行路径
-
绝对路径
# 1.打开文件 file_object = open('E:/python/Project/two/day09/file/info.txt', mode='rt', encoding='utf-8') # 2.读取文件内容,并赋值给data data = file_object.read() # 3.关闭文件 file_object.close() print(data)
windows 写绝对路径的两种方式
# file_object = open('C:\\new\\info.txt', mode='rt', encoding='utf-8') #双斜杠 file_object = open(r'C:\new\info.txt', mode='rt', encoding='utf-8') data = file_object.read() file_object.close() print(data) #加r(读)
-
读文件时,如果文件不存在程序会报错。可以使用os.path.exists判断
import os file_path = "file/info.txt" exists=os.path.exists(file_path) if exists: # 1.打开文件 file_object = open('file/info.txt', mode='rt', encoding='utf-8') # 2.读取文件内容,并赋值给data data = file_object.read() # 3.关闭文件 file_object.close() print(data) else: print("文件不存在")
-
1.2 写路径
-
写文本文件
# 1.打开文件 # 路径:file/n2.txt # 模式:wb(要求写入的内容需要是字节类型) file_object = open("file/n2.txt", mode="wb") # 2.写入内容 file_object.write("达莱".encode("utf-8")) # 3.文件关闭 file_object.close()
file_object.close() # wt模式,写utf-8格式文本 file_object=open("file/n2.txt",mode="wt",encoding="utf-8") file_object.write("查苏娜") file_object.close()
-
写图片等文件
# 读二进制 f1 = open('file/jt.jpg', mode='rb') content = f1.read() f1.close() #写 f2 = open('file/jt1.jpg', mode='wb') f2.write(content) f2.close()
基础案例
# (单用户注册)注册用户名和密码写入txt文件中user = input("请输入用户名:")pwd = input("请输入密码:")data = "{}-{}".format(user, pwd)file_object = open("file/info1.txt", mode="wt", encoding="utf-8")file_object.write(data)file_object.close()
#多用户注册,但是会覆盖上一次的记录while True: user = input("请输入用户名:") if user.upper() == "Q": break pwd = input("请输入密码:") data = "{}-{}".format(user, pwd) file_object = open("file/info2.txt", mode="wt", encoding="utf-8") file_object.write(data)file_object.close()
-
高级案例
-
利用Python想某个网址发送请求并获取结果(利用第三方的模块)
-
下载第三方模块
pip install requests
-
使用第三方模块
```python import requests res = requests.get(url="网址") print(res) ```
-
-
网上下载文本,写入文件
- requests模块
- requests.get(url地址)
- headers
- write(res.content)写文本
import requests res = requests.get( url="https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=20", headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } ) # 网络传输的原始二进制信息(bytes) # res.content file_object = open('files/log1.txt', mode='wb') file_object.write(res.content) file_object.close() # 网络传输的原始二进制信息(bytes) # res.content file_object = open('files/log1.txt', mode='wb') file_object.write(res.content) file_object.close() # 案例2:去网上下载一张图片,图片写入本地文件。 import requests res = requests.get( url="https://hbimg.huabanimg.com/c7e1461e4b15735fbe625c4dc85bd19904d96daf6de9fb-tosv1r_fw1200", headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } ) # 网络传输的原始二进制信息(bytes) # res.content file_object = open('files/美女.png', mode='wb') file_object.write(res.content) file_object.close() # 案例2:去网上下载一张图片,图片写入本地文件。 import requests res = requests.get( url="https://hbimg.huabanimg.com/c7e1461e4b15735fbe625c4dc85bd19904d96daf6de9fb-tosv1r_fw1200", headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } ) # 网络传输的原始二进制信息(bytes) # res.content file_object = open('files/美女.png', mode='wb') file_object.write(res.content) file_object.close()
注意事项:
-
关于文件的打开模式常见应用有:
-
只读:
r
、rt
、rb
(用)- 存在,读
- 不存在,报错
-
只写:
w
、wt
、wb
(用)- 存在,清空再写
- 不存在,创建再写
-
只写:
x
、xt
、xb
- 存在,报错
- 不存在,创建再写。
-
只写:
a
、at
、ab
【尾部追加】(用)- 存在,尾部追加。
- 不存在,创建再写。
-
读写
-
r+、rt+、rb+,默认光标位置:起始位置
file_object = open('file/info.txt', mode='rt+',encoding="utf-8")
# 读取内容
data = file_object.read()
print(data)
# 写入内容
file_object.write("你好")
file_object.close()
file_object = open('file/info.txt', mode='rt+',encoding="utf-8")# 写入内容file_object.write("力王")# 读取内容data = file_object.read()print(data) # -123file_object.close()
-
w+、wt+、wb+,默认光标位置:起始位置(清空文件)
-
.seek(移动光标位置)
file_object = open('file/info.txt', mode='wt+', encoding="utf-8")# 读取内容data = file_object.read()print(data)# 写入内容file_object.write("你好呀")# 将光标位置重置起始file_object.seek(0)# 读取内容data = file_object.read()print(data)file_object.close()
-
x+、xt+、xb+,默认光标位置:起始位置(新文件)
-
a+、at+、ab+,默认光标位置:末尾
file_object = open('file/info.txt', mode='at+',encoding="utf-8") # 写入内容 file_object.write("达莱") # 将光标位置重置起始 file_object.seek(0) # 读取内容 data = file_object.read() print(data) file_object.close()
多用户注册案例
file_object = open('file/account.txt', mode='a') while True: user = input("用户名:") if user.upper() == "Q": break pwd = input("密码:") data = "{}-{}\n".format(user, pwd) file_object.write(data) file_object.close()
1.4 常见功能
-
文件操作中还有很多其他的功能来辅助实现更好的读写文件的内容
-
读所有【常用】
f = open('info.txt', mode='r',encoding='utf-8')data = f.read()f.close()
f = open('info.txt', mode='rb')data = f.read()f.close()
-
读n个字符(字节)【会用到】
f = open('info.txt', mode='r', encoding='utf-8')# 读1个字符data = f.read(1)f.close()print(data) # 字符串第一个值
f = open('info.txt', mode='r',encoding='utf-8')# 读1个字符chunk1 = f.read(1)chunk2 = f.read(2)print(chunk1,chunk2)f.close() #光标后的值
f = open('info.txt', mode='rb')# 读1个字节data = f.read(3)f.close()print(data, type(data)) # 读3个字节
f = open('info.txt', mode='rb')# 读1个字节chunk1 = f.read(3)chunk2 = f.read(3)chunk3 = f.read(1)print(chunk1,chunk2,chunk3)f.close() #读字节
-
readline,读一行
f = open('info.txt', mode='r', encoding='utf-8')v1 = f.readline()print(v1)v2 = f.readline()print(v2)f.close()
-
readlines,读所有行,每行作为列表的一个元素(读出值存成一个列表)
f = open('info.txt', mode='rb') data_list = f.readlines() f.close() print(data_list)
-
循环,读大文件(readline加强版)【常见】
f = open('info.txt', mode='r', encoding='utf-8') for line in f: print(line.strip()) f.close()
-
write,写
f = open('info.txt', mode='a',encoding='utf-8') f.write("达莱") f.close()
f = open('info.txt', mode='ab') f.write( "达莱".encode("utf-8") ) f.close()
-
flush,刷到硬盘
f = open('info.txt', mode='a',encoding='utf-8')while True: # 不是写到了硬盘,而是写在缓冲区,系统会将缓冲区的内容刷到硬盘。 f.write("达莱") f.flush()f.close()
file_object = open('files/account.txt', mode='a')while True: user = input("用户名:") if user.upper() == "Q": break pwd = input("密码:") data = "{}-{}\n".format(user, pwd) file_object.write(data) file_object.flush()file_object.close()
-
移动光标位置(字节)
f = open('file/info.txt', mode='r+', encoding='utf-8')# 移动到指定字节的位置f.seek(3)f.write("达莱")f.close()
注意:在a模式下,调用write在文件中写入内容时,永远只能将内容写入到尾部,不会写到光标的位置。
-
获取当前光标位置(.tell)
f = open('info.txt', mode='r', encoding='utf-8')p1 = f.tell()print(p1) # 0f.read(3) # 读3个字符 3*3=9字节p2 = f.tell()print(p2) # 9f.close()
-
1.5 上下文管理
常用的open对文件进行操作时,每次都要打开和关闭文件,比较繁琐且容易忘记关闭文件。
以后再进行文件操作时,推荐使用with上下文管理,它可以自动实现关闭文件。
with open("xxxx.txt", mode='rb') as file_object: data = file_object.read() print(data)
在Python 2.7 后,with又支持同时对多个文件的上下文进行管理,即:
with open("xxxx.txt", mode='rb') as f1, open("xxxx.txt", mode='rb') as f2: pass
-
重命名
-
import shutilshutil.move("file/newhaha.conf","file/new.conf")
-
-
2. csv格式文件
-
逗号分隔值(Comma-Separated Values,CSV,有时也称为字符分隔值,因为分隔字符也可以不是逗号),其文件以纯文本形式存储表格数据(数字和文本)。
对于这种格式的数据,我们需要利用open函数来读取文件并根据逗号分隔的特点来进行处理。
股票代码,股票名称,当前价,涨跌额,涨跌幅,年初至今SH601778,N晶科,6.29,+1.92,-43.94%,+43.94%SH688566,吉贝尔,52.66,+6.96,+15.23%,+122.29%...
ID,用户名,头像26044585,Hush,https://hbimg.huabanimg.com/51d46dc32abe7ac7f83b94c67bb88cacc46869954f478-aP4Q3V19318369,柒十一,https://hbimg.huabanimg.com/703fdb063bdc37b11033ef794f9b3a7adfa01fd21a6d1-wTFbnO15529690,Law344,https://hbimg.huabanimg.com/b438d8c61ed2abf50ca94e00f257ca7a223e3b364b471-xrzoQd18311394,Jennah·,https://hbimg.huabanimg.com/4edba1ed6a71797f52355aa1de5af961b85bf824cb71-px1nZz
#示例代码 import os import requests with open('files/mv.csv', mode='r', encoding='utf-8') as file_object: file_object.readline() for line in file_object: user_id, username, url = line.strip().split(',') print(username, url) # 1.根据URL下载图片 res = requests.get( url=url, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } ) # 检查images目录是否存在?不存在,则创建images目录 if not os.path.exists("images"): # 创建images目录 os.makedirs("images") # 2.将图片的内容写入到文件 with open("images/{}.png".format(username), mode='wb') as img_object: img_object.write(res.content)
3.ini格式文件
-
ini文件是Initialization File的缩写,平时用于存储软件的的配置文件。例如:MySQL数据库的配置文件。
[mysqld]datadir=/var/lib/mysqlsocket=/var/lib/mysql/mysql.socklog-bin=py-mysql-bincharacter-set-server=utf8collation-server=utf8_general_cilog-error=/var/log/mysqld.log# Disabling symbolic-links is recommended to prevent assorted security riskssymbolic-links=0[mysqld_safe]log-error=/var/log/mariadb/mariadb.logpid-file=/var/run/mariadb/mariadb.pid[client]default-character-set=utf8
-
这种格式是可以直接使用open来出来,考虑到自己处理比较麻烦,所以Python为我们提供了更为方便的方式。
-
import configparser(模块)
import configparser config = configparser.ConfigParser() config.read('files/my.ini', encoding='utf-8') # 1.获取所有的节点 """ result = config.sections() print(result) # ['mysqld', 'mysqld_safe', 'client'] """ # 2.获取节点下的键值 """ result = config.items("mysqld_safe") print(result) # [('log-error', '/var/log/mariadb/mariadb.log'), ('pid-file', '/var/run/mariadb/mariadb.pid')] for key, value in config.items("mysqld_safe"): print(key, value) """ # 3.获取某个节点下的键对应的值 """ result = config.get("mysqld","collation-server") print(result) """ # 4.其他 # 4.1 是否存在节点 # v1 = config.has_section("client") # print(v1) # 4.2 添加一个节点 # config.add_section("group") # config.set('group','name','wupeiqi') # config.set('client','name','wupeiqi') # config.write(open('files/new.ini', mode='w', encoding='utf-8')) # 4.3 删除 # config.remove_section('client') # config.remove_option("mysqld", "datadir") # config.write(open('files/new.ini', mode='w', encoding='utf-8'))
-
读取所有节点
import configparser config = configparser.ConfigParser() config.read('/Users/wupeiqi/PycharmProjects/luffyCourse/day09/files/my.conf', encoding='utf-8') # config.read('my.conf', encoding='utf-8') ret = config.sections() print(ret) >>输出 ['mysqld', 'mysqld_safe', 'client']
-
读取节点下的键值
import configparser config = configparser.ConfigParser() config.read('/Users/wupeiqi/PycharmProjects/luffyCourse/day09/files/my.conf', encoding='utf-8') # config.read('my.conf', encoding='utf-8') item_list = config.items("mysqld_safe") print(item_list) >>输出 [('log-error', '/var/log/mariadb/mariadb.log'), ('pid-file', '/var/run/mariadb/mariadb.pid')]
-
读取节点下值(根据 节点+键 )
import configparserconfig = configparser.ConfigParser()config.read('/Users/wupeiqi/PycharmProjects/luffyCourse/day09/files/my.conf', encoding='utf-8')value = config.get('mysqld', 'log-bin')print(value)>>输出py-mysql-bin
-
检查、删除、添加节点
import configparserconfig = configparser.ConfigParser()config.read('/Users/wupeiqi/PycharmProjects/luffyCourse/day09/files/my.conf', encoding='utf-8')# config.read('my.conf', encoding='utf-8')# 检查has_sec = config.has_section('mysqld')print(has_sec)# 添加节点config.add_section("SEC_1")# 节点中设置键值config.set('SEC_1', 'k10', "123")config.set('SEC_1', 'name', "哈哈哈哈哈")config.add_section("SEC_2")config.set('SEC_2', 'k10', "123")# 内容写入新文件config.write(open('/Users/wupeiqi/PycharmProjects/luffyCourse/day09/files/xxoo.conf', 'w'))# 删除节点config.remove_section("SEC_2")# 删除节点中的键值config.remove_option('SEC_1', 'k10')config.write(open('/Users/wupeiqi/PycharmProjects/luffyCourse/day09/files/new.conf', 'w'))
4. XML格式文件
可扩展标记语言,是一种简单的数据存储语言,XML 被设计用来传输和存储数据。
-
存储,可用来存放配置文件,例如:java的配置文件。
-
传输,网络传输时以这种格式存在,例如:早期ajax传输的数据、soap协议等。
<data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Singapore"> <rank updated="yes">5</rank> <year>2026</year> <gdppc>59900</gdppc> <neighbor direction="N" name="Malaysia" /> </country> <country name="Panama"> <rank updated="yes">69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country></data>
注意:在Python开发中用的相对来比较少
4.1读取文件和内容
- from xml.etree import ElementTree as ET
- ET.pars(ET去打开xml文件)
- getroot()(# 获取根标签)
from xml.etree import ElementTree as ET# ET去打开xml文件tree = ET.parse("files/xo.xml")# 获取根标签root = tree.getroot()print(root) # <Element 'data' at 0x7f94e02763b0>
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank updated="yes">69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ root = ET.XML(content) print(root) # <Element 'data' at 0x7fdaa019cea0>
4.2 读取节点数据
- .XML(content)(获取根标签)
from xml.etree import ElementTree as ETcontent = """<data> <country name="Liechtenstein" id="999" > <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country></data>"""# 获取根标签 dataroot = ET.XML(content)country_object = root.find("country")print(country_object.tag, country_object.attrib)gdppc_object = country_object.find("gdppc")print(gdppc_object.tag,gdppc_object.attrib,gdppc_object.text)
from xml.etree import ElementTree as ETcontent = """<data> <country name="Liechtenstein"> <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country></data>"""# 获取根标签 dataroot = ET.XML(content)# 获取data标签的孩子标签for child in root: # child.tag = conntry # child.attrib = {"name":"Liechtenstein"} print(child.tag, child.attrib) for node in child: print(node.tag, node.attrib, node.text)
from xml.etree import ElementTree as ETcontent = """<data> <country name="Liechtenstein"> <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country></data>"""root = ET.XML(content)for child in root.iter('year'): print(child.tag, child.text)
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein"> <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ root = ET.XML(content) v1 = root.findall('country') print(v1) v2 = root.find('country').find('rank') print(v2.text)
4.3 修改和删除节点
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein"> <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ root = ET.XML(content) # 修改节点内容和属性 rank = root.find('country').find('rank') print(rank.text) rank.text = "999" rank.set('update', '2020-11-11') print(rank.text, rank.attrib) ############ 保存文件 ############ tree = ET.ElementTree(root) tree.write("new.xml", encoding='utf-8') # 删除节点 root.remove( root.find('country') ) print(root.findall('country')) ############ 保存文件 ############ tree = ET.ElementTree(root) tree.write("newnew.xml", encoding='utf-8')
4.4 构建文档
<home> <son name="儿1"> <grandson name="儿11"></grandson> <grandson name="儿12"></grandson> </son> <son name="儿2"></son> </home>
from xml.etree import ElementTree as ET # 创建根标签 root = ET.Element("home") # 创建节点大儿子 son1 = ET.Element('son', {'name': '儿1'}) # 创建小儿子 son2 = ET.Element('son', {"name": '儿2'}) # 在大儿子中创建两个孙子 grandson1 = ET.Element('grandson', {'name': '儿11'}) grandson2 = ET.Element('grandson', {'name': '儿12'}) son1.append(grandson1) son1.append(grandson2) # 把儿子添加到根节点中 root.append(son1) root.append(son2) tree = ET.ElementTree(root) tree.write('oooo.xml', encoding='utf-8', short_empty_elements=False)
<famliy> <son name="儿1"> <grandson name="儿11"></grandson> <grandson name="儿12"></grandson> </son> <son name="儿2"></son></famliy>
from xml.etree import ElementTree as ET# 创建根节点root = ET.Element("famliy")# 创建大儿子son1 = root.makeelement('son', {'name': '儿1'})# 创建小儿子son2 = root.makeelement('son', {"name": '儿2'})# 在大儿子中创建两个孙子grandson1 = son1.makeelement('grandson', {'name': '儿11'})grandson2 = son1.makeelement('grandson', {'name': '儿12'})son1.append(grandson1)son1.append(grandson2)# 把儿子添加到根节点中root.append(son1)root.append(son2)tree = ET.ElementTree(root)tree.write('oooo.xml',encoding='utf-8')
<famliy> <son name="儿1"> <age name="儿11">孙子</age> </son> <son name="儿2"></son></famliy>
from xml.etree import ElementTree as ET# 创建根节点root = ET.Element("famliy")# 创建节点大儿子son1 = ET.SubElement(root, "son", attrib={'name': '儿1'})# 创建小儿子son2 = ET.SubElement(root, "son", attrib={"name": "儿2"})# 在大儿子中创建一个孙子grandson1 = ET.SubElement(son1, "age", attrib={'name': '儿11'})grandson1.text = '孙子'et = ET.ElementTree(root) #生成文档对象
<user><![CDATA[你好呀]]</user>
from xml.etree import ElementTree as ET # 创建根节点 root = ET.Element("user") root.text = "<![CDATA[你好呀]]" et = ET.ElementTree(root) # 生成文档对象 et.write("test.xml", encoding="utf-8")
案例:
content = """<xml> <ToUserName><![CDATA[gh_7f083739789a]]></ToUserName> <FromUserName><![CDATA[oia2TjuEGTNoeX76QEjQNrcURxG8]]></FromUserName> <CreateTime>1395658920</CreateTime> <MsgType><![CDATA[event]]></MsgType> <Event><![CDATA[TEMPLATESENDJOBFINISH]]></Event> <MsgID>200163836</MsgID> <Status><![CDATA[success]]></Status> </xml>""" from xml.etree import ElementTree as ET info = {} root = ET.XML(content) for node in root: # print(node.tag,node.text) info[node.tag] = node.text print(info)
5.Excel格式文件
-
Python内部未提供处理Excel文件的功能,想要在Python中操作Excel需要按照第三方的模块。
pip install openpyxl
-
此模块中集成了Python操作Excel的相关功能
5.1读Excel
-
读sheet
from openpyxl import load_workbookwb = load_workbook("files/p1.xlsx")# sheet相关操作# 1.获取excel文件中的所有sheet名称"""print(wb.sheetnames) # ['数据导出', '用户列表', 'Sheet1', 'Sheet2']"""# 2.选择sheet,基于sheet名称"""sheet = wb["数据导出"]cell = sheet.cell(1, 2)print(cell.value)"""# 3.选择sheet,基于索引位置"""sheet = wb.worksheets[0]cell = sheet.cell(1,2)print(cell.value)"""# 4.循环所有的sheet"""for name in wb.sheetnames: sheet = wb[name] cell = sheet.cell(1, 1) print(cell.value)""""""for sheet in wb.worksheets: cell = sheet.cell(1, 1) print(cell.value)""""""for sheet in wb: cell = sheet.cell(1, 1) print(cell.value)"""
-
读sheet中单元格的数据
from openpyxl import load_workbook wb = load_workbook("file/p1.xlsx") sheet = wb.worksheets[0] # 1.获取第N行第N列的单元格(位置是从1开始) """ cell = sheet.cell(1, 1) print(cell.value) print(cell.style) print(cell.font) print(cell.alignment) """ # 2.获取某个单元格 """ c1 = sheet["A2"] print(c1.value) c2 = sheet['D4'] print(c2.value) """ # 3.第N行所有的单元格 """ for cell in sheet[1]: print(cell.value) """ # 4.所有行的数据(获取某一列数据) """ for row in sheet.rows: print(row[0].value, row[1].value) """ # 5.获取所有列的数据 """ for col in sheet.columns: print(col[1].value) """
-
读合并的单元格
from openpyxl import load_workbook wb = load_workbook("file/p1.xlsx") sheet = wb.worksheets[2] # 获取第N行第N列的单元格(位置是从1开始) c1 = sheet.cell(1, 1) print(c1) # <Cell 'Sheet1'.A1> print(c1.value) # 用户信息 c2 = sheet.cell(1, 2) print(c2) # <MergedCell 'Sheet1'.B1> print(c2.value) # None
from openpyxl import load_workbookwb = load_workbook('file/p1.xlsx')sheet = wb.worksheets[2]for row in sheet.rows: print(row)
>>> 输出结果 (<Cell 'Sheet1'.A1>, <MergedCell 'Sheet1'.B1>, <Cell 'Sheet1'.C1>) (<Cell 'Sheet1'.A2>, <Cell 'Sheet1'.B2>, <Cell 'Sheet1'.C2>) (<Cell 'Sheet1'.A3>, <Cell 'Sheet1'.B3>, <Cell 'Sheet1'.C3>) (<MergedCell 'Sheet1'.A4>, <Cell 'Sheet1'.B4>, <Cell 'Sheet1'.C4>) (<Cell 'Sheet1'.A5>, <Cell 'Sheet1'.B5>, <Cell 'Sheet1'.C5>)
5.1 写Excel
在Excel中想要写文件,大致要分为在:
-
原Excel文件基础上写内容。
from openpyxl import load_workbook wb = load_workbook('files/p1.xlsx') sheet = wb.worksheets[0] # 找到单元格,并修改单元格的内容 cell = sheet.cell(1, 1) cell.value = "新的开始" # 将excel文件保存到p2.xlsx文件中 wb.save("files/p2.xlsx")
-
新创建Excel文件写内容。
from openpyxl import workbook # 创建excel且默认会创建一个sheet(名称为Sheet) wb = workbook.Workbook() sheet = wb.worksheets[0] # 或 sheet = wb["Sheet"] # 找到单元格,并修改单元格的内容 cell = sheet.cell(1, 1) cell.value = "新的开始" # 将excel文件保存到p3.xlsx文件中 wb.save("file/p3.xlsx")
-
Excel中的sheet和cell操作基本上都相同。
from openpyxl import workbook wb = workbook.Workbook() # Sheet # 1. 修改sheet名称 """ sheet = wb.worksheets[0] sheet.title = "数据集" wb.save("p2.xlsx") """ # 2. 创建sheet并设置sheet颜色 """ sheet = wb.create_sheet("工作计划", 0) sheet.sheet_properties.tabColor = "1072BA" wb.save("p2.xlsx") """ # 3. 默认打开的sheet """ wb.active = 0 wb.save("p2.xlsx") """ # 4. 拷贝sheet """ sheet = wb.create_sheet("工作计划") sheet.sheet_properties.tabColor = "1072BA" new_sheet = wb.copy_worksheet(wb["Sheet"]) new_sheet.title = "新的计划" wb.save("p2.xlsx") """ # 5.删除sheet """ del wb["用户列表"] wb.save('files/p2.xlsx') """
from openpyxl import load_workbook from openpyxl.styles import Alignment, Border, Side, Font, PatternFill, GradientFill wb = load_workbook('files/p1.xlsx') sheet = wb.worksheets[1] # 1. 获取某个单元格,修改值 """ cell = sheet.cell(1, 1) cell.value = "开始" wb.save("p2.xlsx") """ # 2. 获取某个单元格,修改值 """ sheet["B3"] = "Alex" wb.save("p2.xlsx") """ # 3. 获取某些单元格,修改值 """ cell_list = sheet["B2":"C3"] for row in cell_list: for cell in row: cell.value = "新的值" wb.save("p2.xlsx") """ # 4. 对齐方式 """ cell = sheet.cell(1, 1) # horizontal,水平方向对齐方式:"general", "left", "center", "right", "fill", "justify", "centerContinuous", "distributed" # vertical,垂直方向对齐方式:"top", "center", "bottom", "justify", "distributed" # text_rotation,旋转角度。 # wrap_text,是否自动换行。 cell.alignment = Alignment(horizontal='center', vertical='distributed', text_rotation=45, wrap_text=True) wb.save("p2.xlsx") """ # 5. 边框 # side的style有如下:dashDot','dashDotDot', 'dashed','dotted','double','hair', 'medium', 'mediumDashDot', 'mediumDashDotDot','mediumDashed', 'slantDashDot', 'thick', 'thin' """ cell = sheet.cell(9, 2) cell.border = Border( top=Side(style="thin", color="FFB6C1"), bottom=Side(style="dashed", color="FFB6C1"), left=Side(style="dashed", color="FFB6C1"), right=Side(style="dashed", color="9932CC"), diagonal=Side(style="thin", color="483D8B"), # 对角线 diagonalUp=True, # 左下 ~ 右上 diagonalDown=True # 左上 ~ 右下 ) wb.save("p2.xlsx") """ # 6.字体 """ cell = sheet.cell(5, 1) cell.font = Font(name="微软雅黑", size=45, color="ff0000", underline="single") wb.save("p2.xlsx") """ # 7.背景色 """ cell = sheet.cell(5, 3) cell.fill = PatternFill("solid", fgColor="99ccff") wb.save("p2.xlsx") """ # 8.渐变背景色 """ cell = sheet.cell(5, 5) cell.fill = GradientFill("linear", stop=("FFFFFF", "99ccff", "000000")) wb.save("p2.xlsx") """ # 9.宽高(索引从1开始) """ sheet.row_dimensions[1].height = 50 sheet.column_dimensions["E"].width = 100 wb.save("p2.xlsx") """ # 10.合并单元格 """ sheet.merge_cells("B2:D8") sheet.merge_cells(start_row=15, start_column=3, end_row=18, end_column=8) wb.save("p2.xlsx") """ """ sheet.unmerge_cells("B2:D8") wb.save("p2.xlsx") """ # 11.写入公式 """ sheet = wb.worksheets[3] sheet["D1"] = "合计" sheet["D2"] = "=B2*C2" wb.save("p2.xlsx") """ """ sheet = wb.worksheets[3] sheet["D3"] = "=SUM(B3,C3)" wb.save("p2.xlsx") """ # 12.删除 """ # idx,要删除的索引位置 # amount,从索引位置开始要删除的个数(默认为1) sheet.delete_rows(idx=1, amount=20) sheet.delete_cols(idx=1, amount=3) wb.save("p2.xlsx") """ # 13.插入 """ sheet.insert_rows(idx=5, amount=10) sheet.insert_cols(idx=3, amount=2) wb.save("p2.xlsx") """ # 14.循环写内容 """ sheet = wb["Sheet"] cell_range = sheet['A1:C2'] for row in cell_range: for cell in row: cell.value = "xx" for row in sheet.iter_rows(min_row=5, min_col=1, max_col=7, max_row=10): for cell in row: cell.value = "oo" wb.save("p2.xlsx") """ # 15.移动 """ # 将H2:J10范围的数据,向右移动15个位置、向上移动1个位置 sheet.move_range("H2:J10",rows=1, cols=15) wb.save("p2.xlsx") """ """ sheet = wb.worksheets[3] sheet["D1"] = "合计" sheet["D2"] = "=B2*C2" sheet["D3"] = "=SUM(B3,C3)" sheet.move_range("B1:D3",cols=10, translate=True) # 自动翻译公式 wb.save("p2.xlsx") """ # 16.打印区域 """ sheet.print_area = "A1:D200" wb.save("p2.xlsx") """ # 17.打印时,每个页面的固定表头 """ sheet.print_title_cols = "A:D" sheet.print_title_rows = "1:3" wb.save("p2.xlsx")
-
6. 压缩文件
- 基于Python内置的shutil模块可以实现对压缩文件的操作。
import shutil# 1. 压缩文件"""# base_name,压缩后的压缩包文件# format,压缩的格式,例如:"zip", "tar", "gztar", "bztar", or "xztar".# root_dir,要压缩的文件夹路径"""# shutil.make_archive(base_name=r'datafile',format='zip',root_dir=r'files')# 2. 解压文件"""# filename,要解压的压缩包文件# extract_dir,解压的路径# format,压缩文件格式"""# shutil.unpack_archive(filename=r'datafile.zip', extract_dir=r'xxxxxx/xo', format='zip')
7.路径相关
7.1转译
windows路径使用的是\,linux路径使用的是/。
特别的,在windows系统中如果有这样的一个路径
D:\nxxx\txxx\x1
,程序会报错。因为在路径中存在特殊符\n
(换行符)和\t
(制表符),Python解释器无法自动区分。所以,在windows中编写路径时,一般有两种方式:
- 加转义符,例如:
"D:\\nxxx\\txxx\\x1"
- 路径前加r,例如:
r"D:\\nxxx\\txxx\\x1"
7.2 程序当前路径
项目中如果使用了相对路径,那么一定要注意当前所在的位置。
例如:在
/Users/dalai/PycharmProjects/CodeRepository/
路径下编写demo.py
文件with open("a1.txt", mode='w', encoding='utf-8') as f: f.write("你好呀")
用以下两种方式去运行:
- 方式1,文件会创建在
/Users/dalai/PycharmProjects/CodeRepository/
目录下。
cd /Users/dalai/PycharmProjects/CodeRepository/ python demo.py
- 方式2,文件会创建在
/Users/dalai
目录下。
cd /Users/dalai python /Users/dalai/PycharmProjects/CodeRepository/demo.py
import os """ # 1.获取当前运行的py脚本所在路径 abs = os.path.abspath(__file__) print(abs) # /Users/wupeiqi/PycharmProjects/luffyCourse/day09/20.路径相关.py path = os.path.dirname(abs) print(path) # /Users/wupeiqi/PycharmProjects/luffyCourse/day09 """ base_dir = os.path.dirname(os.path.abspath(__file__)) file_path = os.path.join(base_dir, 'files', 'info.txt') print(file_path) if os.path.exists(file_path): file_object = open(file_path, mode='r', encoding='utf-8') data = file_object.read() file_object.close() print(data) else: print('文件路径不存在')
7.3文件和路径相关
import shutil import os # 1. 获取当前脚本绝对路径 """ abs_path = os.path.abspath(__file__) print(abs_path) """ # 2. 获取当前文件的上级目录 """ base_path = os.path.dirname( os.path.dirname(路径) ) print(base_path) """ # 3. 路径拼接 """ p1 = os.path.join(base_path, 'xx') print(p1) p2 = os.path.join(base_path, 'xx', 'oo', 'a1.png') print(p2) """ # 4. 判断路径是否存在 """ exists = os.path.exists(p1) print(exists) """ # 5. 创建文件夹 """ os.makedirs(路径) """ """ path = os.path.join(base_path, 'xx', 'oo', 'uuuu') if not os.path.exists(path): os.makedirs(path) """ # 6. 是否是文件夹 """ file_path = os.path.join(base_path, 'xx', 'oo', 'uuuu.png') is_dir = os.path.isdir(file_path) print(is_dir) # False folder_path = os.path.join(base_path, 'xx', 'oo', 'uuuu') is_dir = os.path.isdir(folder_path) print(is_dir) # True """ # 7. 删除文件或文件夹 """ os.remove("文件路径") """ """ path = os.path.join(base_path, 'xx') shutil.rmtree(path) """ # 8. 拷贝文件夹 """ shutil.copytree("/Users/wupeiqi/Desktop/图/csdn/","/Users/wupeiqi/PycharmProjects/CodeRepository/files") """ # 9.拷贝文件 """ shutil.copy("/Users/wupeiqi/Desktop/图/csdn/WX20201123-112406@2x.png","/Users/wupeiqi/PycharmProjects/CodeRepository/") shutil.copy("/Users/wupeiqi/Desktop/图/csdn/WX20201123-112406@2x.png","/Users/wupeiqi/PycharmProjects/CodeRepository/x.png") """ # 10.文件或文件夹重命名 """ shutil.move("/Users/wupeiqi/PycharmProjects/CodeRepository/x.png","/Users/wupeiqi/PycharmProjects/CodeRepository/xxxx.png") shutil.move("/Users/wupeiqi/PycharmProjects/CodeRepository/files","/Users/wupeiqi/PycharmProjects/CodeRepository/images") """
-