参考
https://blog.csdn.net/Jelly_Zhou/article/details/126852970
Python版本
import os
from bs4 import BeautifulSoup
out = '';
folder = 'folder'
file_ = [name for name in os.listdir(folder)
if os.path.isdir(os.path.join(folder, name))]
for file in file_:
print(file[0:-33])
out = out+'\r\n'+file[0:-33]+'\r\n'
names = [name for name in os.listdir(os.path.join(folder, file))
if os.path.isfile(os.path.join(os.path.join(folder, file), name))]
for name in names:
print(' - '+name[0:-37])
out = out+' - '+name[0:-37]+'\r\n'
ffile = os.path.join(os.getcwd(),folder, file,name)
with open(ffile, 'r', encoding='utf-8') as f:
p = BeautifulSoup(f.read(), 'html.parser')
titles = p.select('p')
for t in titles:
print(t.text)
out = out+t.text+'\r\n'
text_file = open("Output.txt", "w")
text_file.write(out)
text_file.close()