本文为IEEE TGRS使用xplore导出检索记录的批处理脚本,导出为pdf格式方便阅读,此处给出导出结果,预览如下。
xplore导出的csv文件格式如下
最终的批处理生成效果如下
下面给出具体代码
generate.py
from reportlab.pdfbase import pdfmetrics # 注册字体
from reportlab.pdfbase.ttfonts import TTFont # 字体类
from reportlab.platypus import Table, SimpleDocTemplate, Paragraph, Image, PageBreak # 报告内容相关类
from reportlab.lib.pagesizes import letter # 页面的标志尺寸(8.5*inch, 11*inch)
from reportlab.lib.styles import getSampleStyleSheet # 文本样式
from reportlab.lib import colors # 颜色模块
from reportlab.graphics.charts.barcharts import VerticalBarChart # 图表类
from reportlab.graphics.charts.legends import Legend # 图例类
from reportlab.graphics.shapes import Drawing # 绘图工具
from reportlab.lib.units import cm # 单位:cm
# 注册字体(提前准备好字体文件, 如果同一个文件需要多种字体可以注册多个)
# pdfmetrics.registerFont(TTFont('SimSun', 'SimSun.ttf'))
class Graphs:
# 绘制标题
@staticmethod
def draw_title(title):
# 获取所有样式表
style = getSampleStyleSheet()
# 拿到标题样式
ct = style['Heading1']
# 单独设置样式相关属性
# ct.fontName = 'SimSun' # 字体名
ct.fontSize = 18 # 字体大小
ct.leading = 30 # 行间距
ct.textColor = colors.black # 字体颜色
ct.alignment = 1 # 居中
ct.bold = True
# 创建标题对应的段落,并且返回
return Paragraph(title, ct)
@staticmethod
def draw_subtitle(title):
# 获取所有样式表
style = getSampleStyleSheet()
# 拿到标题样式
ct = style['Heading1']
# 单独设置样式相关属性
# ct.fontName = 'SimSun' # 字体名
ct.fontSize = 12 # 字体大小
ct.leading = 20 # 行间距
ct.textColor = colors.black # 字体颜色
ct.alignment = 1 # 居中
ct.bold = True
# 创建标题对应的段落,并且返回
return Paragraph(title, ct)
@staticmethod
def draw_abstract(text):
# 获取所有样式表
style = getSampleStyleSheet()
# 获取普通样式
ct = style['Normal']
# ct.fontName = 'SimSun'
ct.fontSize = 12
ct.wordWrap = 'CJK' # 设置自动换行
ct.alignment = 0 # 左对齐
ct.firstLineIndent = 32 # 第一行开头空格
ct.leading = 12
return Paragraph(text, ct)
if __name__ == '__main__':
# 创建内容对应的空列表
content = list()
# 添加标题
content.append(Graphs.draw_title('????'))
content.append(Graphs.draw_abstract('?'))
# 添加小标题
content.append(PageBreak())
content.append(Graphs.draw_title('11'))
doc = SimpleDocTemplate('report.pdf', pagesize=letter)
doc.multiBuild(content)
process.py
import pandas as pd
import numpy as np
# Document Title Notice of Violation of IEEE Publication Princi...
# Authors M. Jingyi; T. Zhang; J. Guodong; Y. Wenjun; Y....
# Author Affiliations Gansu Branch, China Meteorological Administrat...
# Publication Title IEEE Access
# Date Added To Xplore 29 Sep 2020
# Publication Year 2020
# Volume 8
# Issue NaN
# Start Page 173949
# End Page 173960
# Abstract The recognition of ground-based cloud images h...
# ISSN 2169-3536
# ISBNs NaN
# DOI 10.1109/ACCESS.2020.3026364
# Funding Information Youth Science Fund Project: Research on Super-...
# PDF Link https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...
# Author Keywords NaN
# IEEE Terms NaN
# INSPEC Controlled Terms NaN
# INSPEC Non-Controlled Terms NaN
# Mesh_Terms NaN
# Article Citation Count 1.0
# Patent Citation Count NaN
# Reference Count 19.0
# License CCBY
# Online Date 24 Sep 2020
# Issue Date NaN
# Meeting Date NaN
# Publisher IEEE
# Document Identifier IEEE Journals
# Name: 0, dtype: object
xplore_records = ['2023_3_27_TGRS_index/2020.csv', '2023_3_27_TGRS_index/2021.csv', '2023_3_27_TGRS_index/2023.csv',
'2023_3_27_TGRS_index/2022_new_first.csv', '2023_3_27_TGRS_index/2022_old_first.csv']
month_map = {
'Jan': '01',
'Feb': '02',
'Mar': '03',
'Apr': '04',
'May': '05',
'Jun': '06',
'Jul': '07',
'Aug': '08',
'Sep': '09',
'Oct': '10',
'Nov': '11',
'Dec': '12'
}
all_paper = {}
sorted_paper = {}
for file in xplore_records:
data = pd.read_csv(file)
for index, i in data.iterrows():
if 'IEEE Transactions on Geoscience and Remote Sensing' != i['Publication Title']:
continue
doi = str(i['DOI'])
# print(doi)
day, month, year = i['Online Date'].split(' ')
month = month_map[month]
date = '{}-{}-{}'.format(year, month, '0{}'.format(day)[-2:])
k = {
'title': i['Document Title'],
'abs': i['Abstract'],
'authors': i['Authors'],
'date': date
}
all_paper[doi] = k
# print(i)
# print(data)
for paper in all_paper:
name = '{}_{}'.format(all_paper[paper]['date'], paper)
sorted_paper[name] = all_paper[paper]
n = sorted([i for i in sorted_paper])
# print(len(n))
# print(len(all_paper))
# print(set([all_paper[i]['date'] for i in all_paper]))
from generate import *
n = np.array_split(n, 12)
for index, i in enumerate(n):
content = list()
start, end = i[0].split('_')[0], i[-1].split('_')[0]
print(start, end)
for j in i:
# print(sorted_paper[j]['title'])
if sorted_paper[j]['title'] in ['IEEE Transactions on Geoscience and Remote Sensing institutional listings',
'IEEE Transactions on Geoscience and Remote Sensing publication information',
'IEEE Transactions on Geoscience and Remote Sensing information for authors',
'Table of contents',
'[Front cover]',
'IEEE Access',
'Introducing IEEE Collabratec',
'Imagine a community hopeful for the future',
'Front Cover',
'Imagine a community hopeful for the future [Advertisement]',
'TechRxiv: Share Your Preprint Research with the World!']:
continue
content.append(Graphs.draw_title(sorted_paper[j]['title']))
content.append(Graphs.draw_subtitle(sorted_paper[j]['authors']))
content.append(Graphs.draw_subtitle(sorted_paper[j]['date']))
# content.append(Graphs.draw_title(all_paper[j]['date']))
content.append(Graphs.draw_abstract(sorted_paper[j]['abs']))
# 添加小标题
content.append(PageBreak())
doc = SimpleDocTemplate('output/report_{}to{}.pdf'.format(start, end), pagesize=letter)
doc.multiBuild(content)