遇到的问题:如果不模拟登陆的话只能爬取200条评论,但是实现模拟登陆之后也只能爬取500条数据
# -*- encoding:utf-8 -*- import requests from bs4 import BeautifulSoup import re import random import time #使用session来保存登陆信息 s = requests.session() #获取动态ip,防止ip被封 def get_ip_list(url, headers): web_data = requests.get(url, headers=headers) soup = BeautifulSoup(web_data.text, 'lxml') ips = soup.find_all('tr') ip_list = [] for i in range(1, len(ips)): ip_info = ips[i] tds = ip_info.find_all('td&#