在doc文档里面提取指定字符之间的内容,并保存到excel表。
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
#导入对应库
import os
from docx import Document
import xlwt
def extract_text_between_fields(docx_path, field1, field2, output_path):
doc = Document(docx_path)
extracted_text = ""
is_between_fields = False
for paragraph in doc.paragraphs:
#if field1 in paragraph.text and field2 in paragraph.text:
if field1 in paragraph.text:
extracted_text += paragraph.text + "\n"
is_between_fields = True
'''elif field2 in paragraph.text:
extracted_text += paragraph.text + "\n"
is_between_fields = False
elif is_between_fields:
extracted_text += paragraph.text + "\n"
'''
# 创建 Excel 工作簿和工作表
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Extracted Text')
# 将提取的文本保存到 Excel 表格中的单元格
worksheet.write(0, 0, 'Extracted Text')
worksheet.write(1, 0, extracted_text)
# 保存 Excel 表格
workbook.save(output_path)
# 使用示例
def main():
docx_path = "D:\WorkspaceTZX\实习期工作\AUTOSAR_SWS_OS.docx" # 替换为实际的 Word 文档路径
field1 = "[SWS_Os_" # 替换为字段1的标识
field2 = "⌋" # 替换为字段2的标识
output_path = "D:\WorkspaceTZX\实习期工作\AUTOSAR_SWS_OS.xls" # 替换为输出的 Excel 表格路径
extract_text_between_fields(docx_path, field1, field2, output_path)
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm')
main()
# See PyCharm help at https://www.jetbrains.com/help/pycharm/
# This is a sample Python script.
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import os
from docx import Document
import xlwt
def extract_text_between_fields(docx_path, field1, field2, output_path):
doc = Document(docx_path)
extracted_text = ""
is_between_fields = False
for paragraph in doc.paragraphs:
#if field1 in paragraph.text and field2 in paragraph.text:
if field1 in paragraph.text:
extracted_text += paragraph.text + "\n"
is_between_fields = True
'''elif field2 in paragraph.text:
extracted_text += paragraph.text + "\n"
is_between_fields = False
elif is_between_fields:
extracted_text += paragraph.text + "\n"
'''
# 创建 Excel 工作簿和工作表
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Extracted Text')
# 将提取的文本保存到 Excel 表格中的单元格
worksheet.write(0, 0, 'Extracted Text')
worksheet.write(1, 0, extracted_text)
# 保存 Excel 表格
workbook.save(output_path)
# 使用示例
def main():
docx_path = "D:\WorkspaceTZX\实习期工作\AUTOSAR_SWS_OS.docx" # 替换为实际的 Word 文档路径
field1 = "[SWS_Os_" # 替换为字段1的标识
field2 = "⌋" # 替换为字段2的标识
output_path = "D:\WorkspaceTZX\实习期工作\AUTOSAR_SWS_OS.xls" # 替换为输出的 Excel 表格路径
extract_text_between_fields(docx_path, field1, field2, output_path)
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm')
main()
# See PyCharm help at https://www.jetbrains.com/help/pycharm/