【chatGPT】用chatGPT写代码（一）----在doc文档里面提取指定字符之间的内容，并保存到excel表。

本文链接：https://blog.csdn.net/hellotzx/article/details/130644854

在doc文档里面提取指定字符之间的内容，并保存到excel表。

# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

#导入对应库
import os
from docx import Document
import xlwt

def extract_text_between_fields(docx_path, field1, field2, output_path):
doc = Document(docx_path)
extracted_text = ""
is_between_fields = False

for paragraph in doc.paragraphs:
#if field1 in paragraph.text and field2 in paragraph.text:
if field1 in paragraph.text:
extracted_text += paragraph.text + "\n"
is_between_fields = True
'''elif field2 in paragraph.text:
extracted_text += paragraph.text + "\n"
is_between_fields = False
elif is_between_fields:
extracted_text += paragraph.text + "\n"
'''
# 创建 Excel 工作簿和工作表
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Extracted Text')

# 将提取的文本保存到 Excel 表格中的单元格
worksheet.write(0, 0, 'Extracted Text')
worksheet.write(1, 0, extracted_text)

# 保存 Excel 表格
workbook.save(output_path)

# 使用示例
def main():
docx_path = "D:\WorkspaceTZX\实习期工作\AUTOSAR_SWS_OS.docx" # 替换为实际的 Word 文档路径
field1 = "[SWS_Os_" # 替换为字段1的标识
field2 = "⌋" # 替换为字段2的标识
output_path = "D:\WorkspaceTZX\实习期工作\AUTOSAR_SWS_OS.xls" # 替换为输出的 Excel 表格路径

extract_text_between_fields(docx_path, field1, field2, output_path)
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.

# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm')
main()

# See PyCharm help at https://www.jetbrains.com/help/pycharm/

# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import os



from docx import Document
import xlwt

def extract_text_between_fields(docx_path, field1, field2, output_path):
    doc = Document(docx_path)
    extracted_text = ""
    is_between_fields = False

    for paragraph in doc.paragraphs:
        #if field1 in paragraph.text and field2 in paragraph.text:
        if field1 in paragraph.text:
            extracted_text += paragraph.text + "\n"
            is_between_fields = True
        '''elif field2 in paragraph.text:
            extracted_text += paragraph.text + "\n"
            is_between_fields = False
        elif is_between_fields:
            extracted_text += paragraph.text + "\n"
        '''
    # 创建 Excel 工作簿和工作表
    workbook = xlwt.Workbook()
    worksheet = workbook.add_sheet('Extracted Text')

    # 将提取的文本保存到 Excel 表格中的单元格
    worksheet.write(0, 0, 'Extracted Text')
    worksheet.write(1, 0, extracted_text)

    # 保存 Excel 表格
    workbook.save(output_path)


# 使用示例
def main():
    docx_path = "D:\WorkspaceTZX\实习期工作\AUTOSAR_SWS_OS.docx"  # 替换为实际的 Word 文档路径
    field1 = "[SWS_Os_"  # 替换为字段1的标识
    field2 = "⌋"  # 替换为字段2的标识
    output_path = "D:\WorkspaceTZX\实习期工作\AUTOSAR_SWS_OS.xls"  # 替换为输出的 Excel 表格路径

    extract_text_between_fields(docx_path, field1, field2, output_path)
def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    print_hi('PyCharm')
    main()

# See PyCharm help at https://www.jetbrains.com/help/pycharm/