直接来点干货
介绍一下大致的实现过程
- 上传带有目录结构的pdf文件
- 对目录结构进行解析
- 解析结果保存到数据库
以下就是具体的实现过程
class PdfNode(object):
def __init__(self, name, parent, level, page, top, pk=None):
self.children = []
self.name = name
self.parent = None
self.top = top
self.level = level
self.page = page
if pk==None:
self.pk = uuid.uuid4()
else:
self.pk = pk
class Stack:
def __init__(self):
self.items = []
def push(self, o):
self.items.append(o)
def pop(self):
if len(self.items)==0:
return
del self.items[-1]
def top(self):
if len(self.items)==0:
return None
return self.items[-1]
@classmethod
def get_outlines(cls,file,password=None):
stack = PdfNode.Stack()
rootNode = PdfNode('', None, 0, 1, 0)
parent = rootNode
stack.push(parent)
fp = open(file, 'rb')
parser = PDFParser(fp)
document = PDFDocument(parser, password)
outlines = document.get_outlines()
pages = dict((page.pageid, pageno) for (pageno, page) in enumerate(PDFPage.create_pages(document)))
for (level, title, dest, se, a) in outlines:
pageno = pages[dest[0].objid]
top = dest[3]
page = pageno
parent = stack.top()
while parent.level >= level:
stack.pop()
parent = stack.top()
new_node = PdfNode(title, parent, level, page, top)
stack.push(new_node)
parent.children.append(new_node)
return rootNode
class FileOutlineUser(MP_Node):
id = models.UUIDField(primary_key=True,editable=False)
name = models.CharField('名称',max_length=50)
isrequirement = models.NullBooleanField('是否是需求向',null=True)
reason = models.CharField('需求基线的原因',max_length=300,null=True)
top = models.FloatField('距离页面顶端的距离(单位像素)',null=True)
page = models.IntegerField('所在页码',null=True)
requirement_id = models.OneToOneField(RequirementUser,db_column='requirement_id', null=True,on_delete=models.CASCADE)
def __unicode__(self):
return '文件轮廓名称: %s' % self.name
@classmethod
def insert_dbchildren(cls,parentNode, dbNode):
for node in parentNode.children:
newdbNode = dbNode.add_child(id=node.pk, name=node.name, top=node.top, page=node.page)
newdbNode.save()
if len(node.children) > 0:
cls.insert_dbchildren(node, newdbNode)
@classmethod
def insert_dbroot(cls, root_node, name,id):
db_root_node = cls.add_root(pk=root_node.pk, name=name, top=1, page=1,requirement_id_id=id)
db_root_node.save()
cls.insert_dbchildren(root_node, db_root_node)
class Meta:
db_table = 'FileOutlineUser'
verbose_name = '文件轮廓信息'
verbose_name_plural = '文件轮廓信息表'
file = PdfNode.get_outlines("文件路径")
FileOutlineUser.insert_dbroot(file, '', requirementuser.requirement_id)