# coding=gb2312
import csv
import requests
import os
import re
source = requests.get('https://tieba.baidu.com/p/8638729826').content.decode()
try:
with open('test.txt', mode='w', encoding='utf-8') as f:
f.write(source)
print("转为txt文件成功")
except FileNotFoundError:
os.mknod('test.txt')
print("文件创建成功")
# 爬取的全部结果
result_list = []
username_list = re.findall('p_author_name j_user_card(.*?)<', source, re.S)
content_list = re.findall('d_post_content j_d_post_content(.*?)<', source, re.S)
for i in range(len(username_list)):
result = {'username': re.sub('\"(.*)>', "", username_list[i]),
'content': re.sub('\"(.*)>(.*) ', "", content_list[i])
}
result_list.append(result)
for i in range(len(result_list)):
print(result_list[i], '\n')
with open('data.csv', 'w', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=['username', 'content'])
writer.writeheader()
writer.writerows(result_list)
python学习之爬取百度贴吧
最新推荐文章于 2024-04-22 16:07:30 发布