import pandas as pd
xls_path = r'C:\Users\xxx\Desktop\面经_搜索_socket.xlsx'
key_word = 'socket'
output_file = r'E:\myfile.txt'
items_per_file = 20
df = pd.read_excel(xls_path, sheet_name='datatable')
def hasKeyWord(line):
return key_word in line.lower()
line_cnt = 0
node_idx = 0
with open(output_file, 'w', encoding='utf-8') as f:
f.write(str(int(node_idx)) + '\n')
for index, row in df.iterrows():
lines = row['正文'].split("\n")
flt_lines = filter(hasKeyWord, lines)
for line in flt_lines:
outLine = line + '\n\t\t链接\n\t\t\t' + row['链接'] + '\n'
outLine = outLine.lstrip()
outLine = '\t' + outLine
print(outLine)
f.write(outLine)
line_cnt += 1
if line_cnt % items_per_file == 0:
node_idx = line_cnt / items_per_file
f.write(str(int(node_idx)) + '\n')
面试信息处理Python脚本
于 2023-05-30 15:19:03 首次发布