一、通过网络爬虫从网上撸到xml小文件,现将文件合并并转换为txt文件。
#coding=utf-8
import os
from bs4 import BeautifulSoup
#1、放xml文件的文件夹路径
filePath = "G:\\DataCenter\\stoke_critic_data\\"
#2获取文件夹下的所以文件目录
files = os.listdir(path = filePath)
outF = open('G:\DataCenter\comment_data.txt','a',encoding='utf-8')
for i in range(len(files)):
xmlF = open(filePath+files[i],'r',encoding='utf-8')
xmlContent = BeautifulSoup(xmlF,"xml")
xmlF.close()
itemsList = xmlContent.find_all("item")
#遍历写入每一个xml文件中的内容
for items in itemsList:
time = items.time.text
comment = items.comment.text
like = items.like.text
if like.strip() == '点赞':
like = "0"
else:
like = like.strip()
outContent = time.strip() + ',' + comment.strip() + ',' + like + '\n'
outF.write(outContent)
outF.close()
print("all done!")