import tabula
list = tabula.read_pdf(r'中央财经大学期刊目录(2019版).pdf', encoding='utf-8', pages='12-19')
all_data = []
for df in list:
data = df.iloc[:,1].values
for i in data:
if str(i) == "nan":
continue
if type(i) == str and "\r" in i:
i = i.replace("\r"," ")
temp = '"{}":"AA",'.format(i)
all_data.append(temp)
txt = open("a.txt", "w")
for i in all_data:
txt.write(i)
txt.write("\n")
使用python 读取pdf中的表格
最新推荐文章于 2024-07-05 18:41:30 发布