python + Excel

置顶圆弧YH

已于 2024-05-04 10:32:15 修改

阅读量202

点赞数 1

文章标签： python 开发语言

于 2023-04-28 14:35:10 首次发布

本文链接：https://blog.csdn.net/weixin_46205351/article/details/130425747

版权

csv→csv

读取csv表格

python中的csv包可以读取csv文件。

import  openpyxl
import csv

file1=open(r"e:\数据\国民消费水平年度数据.csv")
data=csv.reader(file1)
for i in data:
    print(i)

打开电脑中的数据，用 csv.reader( ) 。

又如下面案例，用csv.reader 读取csv文件。

import csv
import openpyxl
file1=open("huiguan7-29.csv",'r',encoding='GBK')#读取csv文件
data=csv.reader(file1)
print(data)

n=1
myCol=()#这一段程序，读取csv文件中的标题栏目信息，并呈现元组序列。
for line in data:
    if "editor" in line[0]:
        print("标题栏目","\n")
        m=0
        for item in line:
            a=item
            b=m
            ab=[a,b]
            mytuple=(ab,)
            myCol=myCol+mytuple
            m+=1
    if n==1:break
for ab in myCol:
    print(ab)


outname="商业行业馆"
outfilenName=outname+".txt"
wfile=open(outfilenName,"w",encoding="utf-8")

print("ObjectID","会馆","x","y",sep=",",file=wfile)
for line in data:
    if "editor" in line[0]:
        pass
    elif line[5]==outname:            
        print(line[1],line[7],line[18],line[19],sep=",",file=wfile)
wfile.close()


#读取前面程序中生成txt文本中的字符串数据。用字符串的形式，便于openpyxl使用。
partfile=open(outfilenName,"r",encoding="utf-8")
partData=partfile.read()
partfile.close()

#
book=openpyxl.Workbook()
sheet=book.active
sheet.title="test"

datalines=partData.splitlines()
for line in datalines:
    newline=line.split(",")
    print(newline)
    sheet.append(newline)#openpyxl需要列表形式，中间用逗号隔开，才能输入到excel表中。
outxls=outname+".xls"
book.save(outxls)

创建csv文本

创建文本

file=open("积累程序-text.csv","a+")
file.write("美国队长,钢铁侠,蜘蛛侠")

file.close()

即可完成。

csv包读取写入csv表格数据

对于csv文件，可以读取并写入

file=open("积累文件.csv","a+")
file.write("美国队长,钢铁侠,蜘蛛侠")

file.close()

#加newline=' '参数的原因是，可以避免csv文件出现两倍的行距（就是能避免表格的行与行之间出现空白行）
#加encoding='utf-8'，可以避免编码问题导致的报错或乱码

import csv
file1=open("积累csv文件.csv",'w',newline='',encoding='utf-8')

writerFile=csv.writer(file1)
writerFile.writerow(['电影','豆瓣评分'])#调用writer对象的writerow()方法

writerFile.writerow(['银河护卫队','9.1'])

writerFile.writerow(['复仇者联盟','8.1'])

file1.close()

即可完成。

pandas→csv

pandas包读取和写入csv表格数据

使用pandas包，用read_csv()方法来提取工作表

'''
【目标】合并csv表格
【方法】
①找到工作表所在的文件位置
②提取表格

注意：中间要加上一步，整理表格。有的表格其下有说明性文字，得删去。

③合并表格
④导出表格
'''

#第一步，查找文件位置
#使用os 的walk方法输出指定目录的文件
import os
for fileList in os.walk("e:\数据\主要国家年度数据"):
    print(fileList)


#第二步，提取表格

#导入pandas包，用read_csv()方法来提取工作表
import pandas as pd
myList=[]
n=0

for file in os.walk("e:\数据\主要国家年度数据"):
    for table in file[2]:#file 是一个列表

        path=file[0]+"/"+table



        data=pd.read_csv(path,encoding="gbk",skiprows=[0, 1, 2, 3])
        
        print(data)
        input()
        n=n+1
        myList.append(data)
        print("第"+str(n)+"个列表已经提取。")


dataResult=pd.concat(myList)
dataResult.to_csv("e:\数据\主要国家年度数据\合并各洲国家数据dataResult.csv",index=0)

即可完成。

pandas 合并csv表格

【目标】合并csv表格
【方法】
①找到工作表所在的文件位置
②提取表格

注意：中间要加上一步，整理表格。有的表格其下有说明性文字，得删去。否则程序运行会出错。

③合并表格
④导出表格

'''
【目标】合并csv表格
【方法】
①找到工作表所在的文件位置
②提取表格
③合并表格
④导出表格
'''
#举例

#第一步，查找文件位置

#使用os 的walk方法输出指定目录的文件
import os
for fileList in os.walk("e:\数据\主要国家年度数据"):
    print(fileList)



#第二步，提取表格

#导入pandas包，用read_csv()方法来提取工作表

import pandas as pd

myList=[]
n=0

for file in os.walk("e:\数据\主要国家年度数据"):
    for table in file[2]:#file 是一个列表

        path=file[0]+"/"+table



        data=pd.read_csv(path,encoding="gbk",skiprows=[0, 1, 2, 3])
        
        print(data)
        input()
        n=n+1
        myList.append(data)
        print("第"+str(n)+"个列表已经提取。")


dataResult=pd.concat(myList)
dataResult.to_csv("e:\数据\主要国家年度数据\合并各洲国家数据dataResult.csv",index=0)

pandas→xlsx

pandas读取xlsx文件

使用pandas包，读取了xlsx文件数据，然后输入到了txt文本中。

'''
【基本操作】
df.info()
数据表的信息

df.shape
数据表的格式

但是，需要首先把数据excel表整理成标准格式。
df2=df1.dropna()#将所有含有nan项的row删除

print(df.shape)

print(df.columns)查看列的主要信息，竖直线上的端点信息

print(df.dtypes)

print(df.values)查看行的的主要信息，水平线上的端点信息。

print(df.head(3))
print(df.tail(3))

df.to_excel('清理后的数据.xlsx', sheet_name='同亚洲各国（地区）20年进出口总额')#输入到excel表中


'''
    

import pandas as pd
Name="中国同亚洲各国（地区）20年进出口总额年度数据【数据源】国家统计局.xlsx"

df=pd.DataFrame(pd.read_excel(Name))


df=df.dropna()#将所有含有nan项的row删除

writefile=open("清理后的数据result-东南亚国家.txt","w",encoding="utf-8")

iList=list(df.columns)#第一行，即列(columns)名称,转化为iList
iList.reverse()

print(iList[-1],end=",",file=writefile)#输出“指标”
for i in iList[:-1]:#输出列(iList)名称中的各个年份
    print(i,end=",",file=writefile)
print("\n",file=writefile)


countryList=["缅甸","泰国","柬埔寨","老挝","越南","菲律宾","马来西亚","新加坡","文莱","印度尼西亚","东帝汶"]
for row in list(df.values[1:]):
    rowList=list(row)
    rowList.reverse()#逆转统计局数据中的顺序,源数据是从2021-2001排序，逆序为2001-2021排序

    for i in countryList:#遍历(for)东南亚国家列表（countryList）中的数据i
        if i in rowList[-1]:#如果（if）数据i包含在这一行的最后一个位置（rowList[-1]），即逆序后的国家名一列）
            print(rowList[-1],end=",",file=writefile)#输出rowList[-1]国家名所在列，放在表格第一列column

    
            for i in rowList[0:-1]:#输出每年（rowList[0:-1]）数据
                print(i,end=",",file=writefile)
            print("\n",file=writefile)

即可完成。

openpyxl→xlsx

openpyxl读取xlsx文件

使用openpyxl库可以实现读取Excel文件中的每列数据和每行数据。

但在使用openpyxl前，要先在文件夹中选择xlsx文件，接着加载并打开xlsx文件，然后选择xlsx文件中的表单sheet。一个xlsx文件中可能有多个sheet，所以需要选择。

import os
import openpyxl

os.chdir("E:\scoretest")
subList=os.listdir(os.getcwd())

myxlsxName=subList[0]#在文件夹subList 中选择xlsx文件

wb=openpyxl.load_workbook(filename=myxlsxName)#加载并打开xlsx文件
mysheet=wb[wb.sheetnames[0]] #读取xlsx文件中的第一张表。

横行row, 竖列column。横行row 堆积为rows,就是column。

可以获取横行和竖列的个数大小，以便在循环的时候用得到。获取横行row的数量。因为循环的时候用的range() 函数是“左闭右开”，所以会加上1，避免遗漏。反之，竖列如果用range()函数循环，也同理要加上1。


column_count = mysheet.max_column #获取竖列column的数量。
print("竖列 column:",column_count)

row_count=mysheet.max_row#获取横行column的数量。因为循环的时候是“左闭右开”，所以会加上1。
print("横行 row ：",row_count)

最重要是这样一段代码，类似九九乘法表，这段代码以竖列column为单位，依次输出横行row中的数据。

#类似九九乘法表，这段代码以竖列column为单位，依次输出横行row中的数据。反之亦可。
for col in range(1, column_count+1):
    for row in range(1, mysheet.max_row+1):
        cell_value = mysheet.cell(row=row, column=col).value
        print(cell_value,end=",",sep=",")
    print()

cell_value = mysheet.cell(row=row, column=col).value中，row和col可以依据实际需求替换为具体数字。比如调整为cell_value = mysheet.cell(row=1, column=4).value，这样就只是输出了栏目位置为（1，4）的数据。

掌握了这个语句，运用循环结构，就可以获取任意一行、任意一列、任意一个空格的数据。比如，可以将标题栏中的数据放到一个列表中。便于之后操作。代码如下：

headList=[]
for col in range(1,column_count+1):
    cell_value = mysheet.cell(row=1, column=col).value
    headList.append(cell_value)
print(headList)

完整代码如下


import os
import openpyxl

os.chdir("E:\scoretest")
subList=os.listdir(os.getcwd())


myxlsxName=subList[0]

print(myxlsxName)
wb=openpyxl.load_workbook(filename=myxlsxName)
mysheet=wb[wb.sheetnames[0]] #读取xlsx文件中的第一张表。


column_count = mysheet.max_column #获取竖列column的数量。
print("竖列 column:",column_count)

row_count=mysheet.max_row+1#获取横行column的数量。
print("横行 row:",row_count)



#标题行，表格第一行，需要横行row=1, 竖列col 遍历
for col in range(1, column_count+1):
    cell_value=mysheet.cell(row=1, column=col).value
    print(cell_value,sep=",",end=",")
print()


#类似九九乘法表，这段代码以竖列column为单位，依次输出横行row中的数据。反之亦可。
for col in range(1, column_count+1):
    for row in range(1, mysheet.max_row+1):
        cell_value = mysheet.cell(row=row, column=col).value
        print(cell_value,end=",",sep=",")
    print()


##输出竖列column 的值。这段代码与上面数据代码效果完全一致。
for col in range(1, column_count+1):
    for row in mysheet.rows:  #横行row, 竖列column。横行row 堆积为rows,就是column
        print(row[col-1].value,end=",",sep=",")
    print()

输出每列和每行数据的程序如下。下面这段代码是bingChat写的。

#使用openpyxl库可以实现读取Excel文件中的每列数据和每行数据。

#输出每列数据的程序如下：


import openpyxl

# 打开Excel文件
workbook = openpyxl.load_workbook("example.xlsx")

# 选择第一个工作表
sheet = workbook.active

# 获取列数
column_count = sheet.max_column

# 遍历每列
for col in range(1, column_count+1):
    column_data = []
    # 遍历每行
    for row in range(1, sheet.max_row+1):
        cell_value = sheet.cell(row=row, column=col).value
        column_data.append(cell_value)
    print(f"第{col}列数据: {column_data}")

#输出每行数据的程序如下：


import openpyxl

# 打开Excel文件
workbook = openpyxl.load_workbook("example.xlsx")

# 选择第一个工作表
sheet = workbook.active

# 遍历每行
for row in sheet.iter_rows(values_only=True):
    print(f"行数据: {row}")

#需要注意的是，上述代码中的"example.xlsx"是要读取的Excel文件的路径，需要根据实际情况进行修改。

另外举例如下，用ｏｐｅｎｐｙｘｌ处理具体的ｘｌｓｘ文件信息。

import openpyxl

rname=input("请输入excel文件：")#"huiguanAll.xlsx"
book = openpyxl.load_workbook(rname,data_only=True)
##print(book.worksheets[0])
##print(book.sheetnames)
print("Excel 文件中表格sheet如下：",end=" ")
sn=0
for i in book.worksheets:
    
    print(sn,end=" ")
    print(i.title,end="；")
    sn+=1
print()
sheetNum=int(input("要分析的第n个表格，请输入n："))
sheet=book.worksheets[sheetNum]
rowNum=int(sheet.max_row)
colNum=int(sheet.max_column)

myDict={}
for m in range(1,colNum+1):#读取表格中的栏目(位于第一行),即读取第一行的标题
    title=sheet.cell(row=1,column=m).value
    myDict[m]=title
for (k,v) in myDict.items():
    print(k,v)

outList=[]

while True:
    out=int(input("请输入目标列数字，第1个为判断项，输入0则退出："))
    if out==0:
        break
    outList.append(out)
##    
##outColumnNote=36#输出所有行，第36列栏目的信息
##outColumnName=4
##outx=22
##outy=21
##outColumnSheng=6

name=""
for outColumnN in outList[1:]:
    name=name+str(myDict[outColumnN])+" "
name=name+str(myDict[outList[0]])
              
geshi=".txt"
outfileName=rname+"-"+name+geshi

wbook=openpyxl.Workbook()#注意不要和阅读文件中的sheet混淆。否则极易出现失误。
wsheet=wbook.active
wsheet.title=name

wfile=open(outfileName,"w",encoding="utf-8")

for i in range(1,rowNum):
    myline=[]

    col0=sheet.cell(row=i,column=outList[0]).value#字符串
    if col0 !=None:#目标所求项不能为空，进行筛选。
        for colNum in outList[1:]:
            coli=sheet.cell(row=i,column=colNum).value#sheet.cell(row=x,colum=y).value。x,y为数字。
            print(coli,sep=",",end=",",file=wfile)
            myline.append(coli)
        print(col0,sep=",",file=wfile)
        myline.append(col0)
        wsheet.append(myline)
wfile.close()
outxls=rname+"-"+name+".xlsx"
wbook.save(outxls)

openpyxl写入xlsx文件

输入列表中的数据到xlsx文件中

'''
要将列表信息按照元素快速输入到各个xlsx表格中的单元格中，可以使用openpyxl库来操作xlsx文件。下面是一个示例代码：
'''

from openpyxl import Workbook

# 创建一个新的工作簿
workbook = Workbook()  #注意：Workbook(),不禁W要大写，而且后面还要有括号。

# 获取默认的工作表
sheet = workbook.active

# 定义列表信息
data = ['apple', 'banana', 'orange', 'grape']

# 将列表信息按照元素输入到单元格中
for i, item in enumerate(data):
    cell = sheet.cell(row=i+1, column=1)  # 从第1行第1列开始输入
    cell.value = item

# 保存工作簿
workbook.save('output.xlsx')

写入单元格数据，具体代码如下：

#使用openpyxl库可以生成和写入xlsx文件。
#首先，需要导入openpyxl库：
from openpyxl import Workbook

#然后，创建一个Workbook对象：
wb = Workbook()

#接下来，可以通过调用wb.active属性来获取活动的工作表对象：
ws = wb.active

#可以使用ws.cell(row, column, value)方法来写入单元格的值。例如，将字符串“Hello, World!”写入第1行、第1列的单元格：
ws.cell(1, 1, "Hello, World!")

#可以使用wb.save(filename)方法将Workbook对象保存为xlsx文件。例如，将文件保存为example.xlsx：
wb.save("example.xlsx")

#完整的代码示例：
from openpyxl import Workbook

# 创建Workbook对象
wb = Workbook()

# 获取活动的工作表对象
ws = wb.active

# 写入单元格的值
ws.cell(1, 1, "Hello, World!")

# 保存为xlsx文件
wb.save("example.xlsx")

#执行以上代码后，将生成一个名为example.xlsx的xlsx文件，并在第1行、第1列的单元格中写入了字符串“Hello, World!”。

使用openpyxl安装包

import openpyxl
workBook=openpyxl.Workbook()#注意，①括号不要忘了；②w 要大写
sheet=workBook.active
sheet.title = 'new title'

sheet["a1"]="漫威宇宙"
row=['灭霸','响指','敲']
sheet.append(row)

rows=[['美国队长','钢铁侠','蜘蛛侠'],['是','漫威','宇宙','经典','人物']]

for i in rows:
    sheet.append(i)


workBook.save("Marvel.xlsx")

即可完成。

又有如下案例：

file1=open(r"e:\qqMusic_Href.txt","w",encoding="utf-8")
import openpyxl
wb2=openpyxl.load_workbook(filename="积累文档-QQ音乐链接简练版.xlsx")

sheetRanges=wb2[wb2.sheetnames[0]]#定位到表格第一张表

myList=[]
for row in sheetRanges.rows:#遍历每一横行
    iList=[]
    iList.append(row[0].value)#输出xlsx文件每一单元格中的值
    iList.append(row[1].value)
    myList.append(iList)

newLine=[]
for everyLine in myList:
    try:

        if len(everyLine[0])>=20:
        
            newStr=everyLine[0].replace(everyLine[0],"关于腾讯 ")
            newLine.append([newStr,everyLine[1]])#依据字符串长度判断，将delStr中的语句替换，使得文本更加简明清晰
        elif "http" not in everyLine[1]:
            newStr2="http:"+everyLine[1]
            newLine.append([everyLine[0],newStr2])
        else:
            newLine.append(everyLine)

    except:pass
for i in newLine:
    print(i[0],file=file1)
    print(i[1],file=file1)
  
file1.close()

openpyxl读取文件后分割文本

举例如下，将ｘｌｓ文件读取后，按照需要分成不同部分，以便于计算。

import openpyxl
rname="huiguanAll.xlsx-会馆名 省籍 百度Y 百度X 序号.xlsx"
rbook=openpyxl.load_workbook(rname)
sn=0
for i in rbook.worksheets:
    print(sn,end=":")
    print(i.title,end="；")
    sn+=1

rsheet=rbook.worksheets[0]
rowNum=rsheet.max_row
colNum=rsheet.max_column


titleDict={}#选取excel表中的题目信息
for m in range(1,colNum+1):
    title=rsheet.cell(row=1,column=m).value
    titleDict[m]=title

myDivsionCol=2#选取区分标准所在的列数
shengList=[]
for i in range(1,rowNum):
    sheng=rsheet.cell(row=i,column=myDivsionCol).value
    shengList.append(sheng)
shengSet=set(shengList)


shengDict={}#将xlsx文件中的信息，按照省份分开。省份用set集合，这样就具有唯一性。
for sheng in shengSet:
    huiguanList=[]
    for i in range(1,rowNum):
        myline=[]
        
        isheng=rsheet.cell(row=i,column=myDivsionCol).value
        if isheng==sheng:#按照第1列分类，同一省份归于同一列表huiguanList
            for colNum in range(1,colNum+1):
                coli=rsheet.cell(row=i,column=colNum).value
                myline.append(coli)#excel表中，每一格coli是一个信息，myline将一行中的coli放在一个列表。
            huiguanList.append(myline)
    
    shengDict[sheng]=huiguanList#每个省份对应一个列表
for (k,vSheng) in shengDict.items():
    wname=str(k)+".txt"
    wfile=open(wname,"w",encoding="utf-8")
    n=1
    for title in titleDict.values():
        if n<colNum:
            print(title,sep=",",end=",",file=wfile)
        if n==colNum:
            print(title,sep=",",file=wfile)
        n+=1
    for huiguan in vSheng:
        n=1
        for i in huiguan:
            if n<colNum:
                print(i,sep=",",end=",",file=wfile)
            if n==colNum:
                print(i,sep=",",file=wfile)
            n+=1

openpyxl生成并合并xlsx文件

同时运用os，



实践：excel文件
生成多份excel文件，并将这些文件合并在一起。代码如下：


import os

from random import choice,randrange
from random import *
from openpyxl import Workbook,load_workbook

import sqlite3

def GRD():
    
    for i in range(10):
        xlsName="xlsxs"+str(i)+".xlsx"
        totalLines=randrange(100)
        wb=Workbook()
        ws=wb.worksheets[0]
        
        ws.append(["a","b","c","d","e"])

        
        for j in range(totalLines):
            line=[chr(randint(50,70)) for x in range(5) ]
            ws.append(line)
   
        wb.save(xlsName)
GRD()
print(os.getcwd())
os.chdir("E:\社团")
print(os.listdir(os.getcwd()))

xlsxs=("xlsxs"+ fn for fn in os.listdir("."))
with sqlite3.connect("dataxlsx.db") as conn:
    cur=conn.cursor()
    for xlsx in xlsxs:
        sql="INSERT INFO fromxlsx VALUE(?,?,?,?,?)"
        cur.executemany(sql,eachXlsx(xlsx))
        conn.commit()

即可完成。

又如以下例子：


import os

from random import choice,randrange
from random import *
from openpyxl import Workbook,load_workbook

import sqlite3

def GRD():
    
    for i in range(10):
        xlsName="xlsxs"+str(i)+".xlsx"
        totalLines=randrange(100)
        wb=Workbook()
        ws=wb.worksheets[0]
        
        ws.append(["a","b","c","d","e"])

        
        for j in range(totalLines):
            line=[chr(randint(50,70)) for x in range(5) ]
            ws.append(line)
   
        wb.save(xlsName)
GRD()
print(os.getcwd())
os.chdir("E:\社团")
print(os.listdir(os.getcwd()))

xlsxs=("xlsxs"+ fn for fn in os.listdir("."))
with sqlite3.connect("dataxlsx.db") as conn:
    cur=conn.cursor()
    for xlsx in xlsxs:
        sql="INSERT INFO fromxlsx VALUE(?,?,?,?,?)"
        cur.executemany(sql,eachXlsx(xlsx))
        conn.commit()

利用os、random 等包.

xlrd→xls

用ｘｌｒｄ可以阅读ｘｌｓ文件。如下代码所示：

def test():
    
    import xlrd
    fileName=input("请输入xls文件名称：")
    rbook=xlrd.open_workbook(fileName)
    sheetList=rbook.sheet_names()#excel表单有很多页，这里是每一页的名字，输入到表格中
    print("该xls文件拥有的表格：",sheetList)
    for sheet in rbook.sheets():#excel循环表单每一页，输出名称
        print("表格具体位置及名称：",sheet)
        
    num=int(input("请输入xls文件中想要处理的页面序号："))
    #num=0#输入要处理的表格。
    sheetName=sheetList[num]
    rsheet=rbook.sheet_by_name(sheetName)
    row=rsheet.nrows
    colum=rsheet.ncols
    print(f"选择的{sheetName}表格大小：{row}行 x {colum}列 = {row*colum} 格子")

    rowNum=0
    columNum=0

    choice=input("选择row行输入R，选择colum列输入C：")
    choice=choice.upper()


    wfile=open("Stats.txt","a+",encoding="utf-8")
    
    if choice=="R":
        headList=rsheet.row_slice(rowNum)#表格中是excel单位格的形式
        for m in headList:
            print(headList.index(m),m.value)

        outNume=[]
        while True:
            out=input("请输入所求列，输入OK结束：")
            
            if out.upper()=="OK":
                break
            outNume.append(int(out))
            
        #outNume=[0,1,3]
        for outRow in range(0,row):#excel表格中是从1开始，到nrow结束。range(0,row)左闭右开，是从0开始，row-1结束。恰好没有遗漏
            contentList=rsheet.row_slice(outRow)
            for m in outNume:     
                if m==outNume[-1]:
                    print(contentList[m].value,sep=",",file=wfile)
                else:
                    print(contentList[m].value,end=",",sep=",",file=wfile)
    elif choice=="C":
        headList=rsheet.col_slice(columNum)
        for m in headList:
            print(headList.index(m),m.value)
        for outCol in range(0,colum):
            contentList=rsheet.col_slice(outCol)

            for m in range(1,row):
                if m==row:
                    print(contentList[m].value,sep=",",file=wfile)
                else:
                    print(contentList[m].value,end=",",sep=",",file=wfile)
             
    wfile.close()
    print("已经输出文本到Stats.txt中。")


test()

xlrd 处理Proquest导出的xls文件数据

proquest图书馆导出适用于Excel的xls 文件。

ProQuest | Better research, better learning, better insights.

这里面有很多书籍，可以导出exel 格式的引文。将excel文本的数据，用txt文本来呈现。

def test():
    import xlrd  #读取excel文件，用xlrd包
    print("引文来源网站:https://www.proquest.com/")
    fileName=input("请下载proquest图书馆Excel 或 整合电脑文件：")
  
    book=xlrd.open_workbook(fileName)
    sheetList=book.sheet_names()

    n=0
    outList=["ISSN","URL","pages","abstract","author","title","number","pages","publisher","journal","volume","year"] #自己设定的栏目，可以选择从excel文件中筛选。
    for sheet in book.sheets():
        sheetName=sheetList[n]
        print(f"工作表 {n}:",sheetName)
        print(sheet)
        n+=1

        row=sheet.nrows
        
        colum=sheet.ncols
        print(f"规格：{row}横行 x {colum}竖列")  #看看excel文件的行数和列数

        tagList=sheet.row_slice(0)
        contentList=sheet.row_slice(1)
        xy=zip(tagList,contentList)

        myDict={}
        for i in xy:
            tag=i[0].value
            content=i[1].value  #读取各个单元格的数值
            myDict[tag]=content
        

        myDict["author"]=myDict["Authors"]
        myDict["abstract"]=myDict["Abstract"]
        myDict["URL"]=myDict["DocumentURL"]
        myDict["title"]=myDict["Title"]
        myDict["publisher"]=myDict["placeOfPublication"]+":"+myDict["companies"]
        myDict["journal"]=myDict["pubtitle"]
        myDict["ISSN"]=myDict["issn"]
        

        title=myDict["Title"]
        wfile=open(f"{title} MyNoteExpress {n}.txt","w",encoding="utf-8")

        for i in myDict:
            if i in outList:
                print(i,"=","{",myDict[i],"}",sep="",file=wfile)
        wfile.close()
        print("已完成。")
test()

不过，proquest也可以导出bibtxt格式的引文, 在noteexpress中可以导入bibtxt文档。如下

NoteExpress软件中，快捷键Ctrl +M 可导入题录。

变化BibTxt文档为NoteExpress文档

对于不同的引文，有时候想尝试一下转换，所以写了这样一段代码。将txt文本的数据，用不同的txt形式展现出来。

file=open("10.2307_23732282.txt","r",encoding="utf-8")
data=file.read()
file.close()

import re
datalines=data.splitlines()
myDict={}
myDict["{Reference Type}"]="Journal Article"


pattern="{..*?}"

wfile=open("test.txt","w",encoding="utf-8")
for everyline in datalines[1:-2]:
    myline=everyline.split("=")
    tag=myline[0]
    tag=tag.strip(" ")  
    tag=tag.title()
    myTag="{"+tag+"}"
    
    info=myline[1]
    myInfo=re.findall(pattern,info)[0]
    myInfo=myInfo.strip("{}")
    myDict[myTag]=myInfo

outList=["{Reference Type}","{Journal}","{Pages}","{Abstract}","{Author}","{Year}","{Title}","{Publisher}"]
for (k,v) in myDict.items():
    if k in outList:
        print(k,":",v,file=wfile)
####
##print("{Reference Type}:",myDict["{Reference Type}"],file=wfile)
##print("{Journal}:",myDict["{Journal}"],file=wfile)
##print("{Publisher}:",myDict["{Publisher}"],file=wfile)
##print("{Title}:",myDict["{Title}"],file=wfile)
##print("{Year}:",myDict["{Year}"],file=wfile)
##print("{Author}:",myDict["{Author}"],file=wfile)
##print("{Abstract}:",myDict["{Abstract}"],file=wfile)
##print("{Pages}:",myDict["{Pages}"],file=wfile)
wfile.close()

xlrd处理国家统计局下载的xls文件数据

使用安装包xlrd，读取统计局下载的数据，而后输出到txt文本中。

import xlrd
fileName=input("请输入从国家统计局下载的数据xls文件：")
book=xlrd.open_workbook(fileName)
sheetList=book.sheet_names()#excel表单有很多页，这里是每一页的名字，输入到表格中
for sheet in book.sheets():#excel循环表单每一页，输出名称
    print(sheet)

sheetName=input("请输入xls文件中想要处理的页面：")
sheet=book.sheet_by_name(sheetName)
row=sheet.nrows
colum=sheet.ncols
print(f"选择的{sheetName}表格大小：{row}行 x {colum}列 = {row*colum} 格子")
print()

wfile=open("Stats.txt","a+",encoding="utf-8")
choice=input("时间在横行，选择TH；时间在竖列，选择TS：")
while True:
    if choice=="TH":
        headHang=0
        yearHang=2
        
        yearList=sheet.row_slice(yearHang)#表格中是excel单位格的形式
        zhiBiao=yearList[0]
        print(zhiBiao.value,end=", ",sep=",",file=wfile)
        yearList.reverse()#改变年份的顺序
        for i in yearList[:-1]:
            print(i.value,end=", ",sep=",",file=wfile)
        print("\n",file=wfile)

        for hang in range(3,row-4):
            ContentList=sheet.row_slice(hang)
            Biao=ContentList[0]
            print(Biao.value,end=",",sep=" ",file=wfile)
            ContentList.reverse()
            for i in ContentList[:-1]:
                print(i.value,end=",",sep=" ",file=wfile)
            print("\n",file=wfile)
        break
    elif choice=="TS":
        yearLie=0
        yearList=sheet.col_slice(yearLie)
        zhiBiao=yearList[3]
        print(zhiBiao.value,end=",",sep=" ,",file=wfile)
        
        yearList.reverse()
        for i in yearList[2:-4]:
            print(i.value,end=" ,",sep=" ,",file=wfile)
        print("\n",file=wfile)
        
        for i in range(1,colum):
            contentList=sheet.col_slice(i)
            print(contentList[3].value,end=",",sep=",",file=wfile)
            contentList.reverse()#倒序，所以下面的也要倒序
            for i in contentList[2:row-4]:
                print(i.value,end=", ",sep=" ,",file=wfile)
            print("\n",file=wfile)
        print("\n",file=wfile)
        break
    else:
        print("请重新输入TH 或者 TS：")
        continue
    
wfile.close()
print("已经输出文本到Stats.txt中。")

即可完成。

也有一些函数公式，方便对xls文档进行操作。

'''
【基本操作】
df.info()
数据表的信息
df.shape
数据表的格式

但是，需要首先把数据excel表整理成标准格式。
df2=df1.dropna()#将所有含有nan项的row删除

print(df.shape)

print(df.columns)查看列的主要信息，竖直线上的端点信息

print(df.dtypes)

print(df.values)查看行的的主要信息，水平线上的端点信息。
print(df.head(3))
print(df.tail(3))

'''
    
import pandas as pd
Name="中国同亚洲各国（地区）20年进出口总额年度数据【数据源】国家统计局.xlsx"

df=pd.DataFrame(pd.read_excel(Name))


df=df.dropna()#将所有含有nan项的row删除

print(df.columns)

print(df.values)

又如下面例子：

import pandas as pd
Name="中国同亚洲各国（地区）20年进出口总额年度数据【数据源】国家统计局.xlsx"

df=pd.DataFrame(pd.read_excel(Name))
print(df)

读取xls文档名称后，可以进一步操作。

'''
【基本操作】
df.info()
数据表的信息

df.shape
数据表的格式

但是，需要首先把数据excel表整理成标准格式。
df2=df1.dropna()#将所有含有nan项的row删除

print(df.shape)

print(df.columns)查看列的主要信息，竖直线上的端点信息

print(df.dtypes)

print(df.values)查看行的的主要信息，水平线上的端点信息。

print(df.head(3))
print(df.tail(3))

df.to_excel('清理后的数据.xlsx', sheet_name='同亚洲各国（地区）20年进出口总额')#输入到excel表中


'''
    

import pandas as pd
Name="中国同亚洲各国（地区）20年进出口总额年度数据【数据源】国家统计局.xlsx"

df=pd.DataFrame(pd.read_excel(Name))


df=df.dropna()#将所有含有nan项的row删除

writefile=open("清理后的数据result.txt","w",encoding="utf-8")
iList=list(df.columns)
iList.reverse()

print(iList[-1],end=",",file=writefile)
for i in iList[:-1]:
    print(i,end=",",file=writefile)
print("\n",file=writefile)

for row in list(df.values[1:]):
    rowList=list(row)
    rowList.reverse()#逆转统计局数据中的顺序
    
    print(rowList[-1],end=",",file=writefile)
    for i in rowList[1:-1]:
        print(i,end=",",file=writefile)
    print("\n",file=writefile)

又如下面例子：

'''
【基本操作】
df.info()
数据表的信息

df.shape
数据表的格式

但是，需要首先把数据excel表整理成标准格式。
df2=df1.dropna()#将所有含有nan项的row删除

print(df.shape)

print(df.columns)查看列的主要信息，竖直线上的端点信息

print(df.dtypes)

print(df.values)查看行的的主要信息，水平线上的端点信息。

print(df.head(3))
print(df.tail(3))

df.to_excel('清理后的数据.xlsx', sheet_name='同亚洲各国（地区）20年进出口总额')#输入到excel表中


'''
    

import pandas as pd
Name="中国同亚洲各国（地区）20年进出口总额年度数据【数据源】国家统计局.xlsx"

df=pd.DataFrame(pd.read_excel(Name))


df=df.dropna()#将所有含有nan项的row删除

writefile=open("清理后的数据result-东南亚国家.txt","w",encoding="utf-8")

iList=list(df.columns)#第一行，即列(columns)名称,转化为iList
iList.reverse()

print(iList[-1],end=",",file=writefile)#输出“指标”
for i in iList[:-1]:#输出列(iList)名称中的各个年份
    print(i,end=",",file=writefile)
print("\n",file=writefile)


countryList=["缅甸","泰国","柬埔寨","老挝","越南","菲律宾","马来西亚","新加坡","文莱","印度尼西亚","东帝汶"]
for row in list(df.values[1:]):
    rowList=list(row)
    rowList.reverse()#逆转统计局数据中的顺序,源数据是从2021-2001排序，逆序为2001-2021排序

    for i in countryList:#遍历(for)东南亚国家列表（countryList）中的数据i
        if i in rowList[-1]:#如果（if）数据i包含在这一行的最后一个位置（rowList[-1]），即逆序后的国家名一列）
            print(rowList[-1],end=",",file=writefile)#输出rowList[-1]国家名所在列，放在表格第一列column

    
            for i in rowList[0:-1]:#输出每年（rowList[0:-1]）数据
                print(i,end=",",file=writefile)
            print("\n",file=writefile)

总之，有多种操作。