小白刚开始学习爬虫技术,第一个练手就失败。
希望有大佬能解答一下
import requests
from bs4 import BeautifulSoup
import pandas as pd
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0"
}
Id_data = []
Time_data = []
Content_data = []
data = pd.DataFrame()
for page in range(1,3):
response = requests.get(f"https://tieba.baidu.com/p/6113854821?pn={page}", headers = headers)
html = response.text
soup = BeautifulSoup(html, "html.parser")
All_id = soup.findAll("a",attrs = {'class': 'p_author_name j_user_card'})
All_time = soup.findAll('span', attrs = {'class': 'tail-info'})
All_content = soup.findAll('div', attrs = {'class': 'p_content'})
for num in All_id:
num_id = num.text
Id_data.append(num_id)
for Time in All_time:
num_time = Time.text
Time_data.append(num_time)
for content in All_content:
num_content = content.text
Content_data.append(num_content)
data['id'] = Id_data
data['time'] = Time_data
data['content'] = Content_data
data.to_excel('tieba.xlsx',index = False)