1.User-Agen池
2.IP池
3.禁用COOKIE
settings.py
COOKIES_ENABLED = False
4.下载延迟
#settings.py
DOWNLOAD_DELAY = 3
5.模拟登陆
Scrapy.FromRequest方法
#爬虫文件
# -*- coding: utf-8 -*-
import scrapy
class Sun2Spider(scrapy.Spider):
name = 'Sun2'
allowed_domains = ['sun0769.com']
#登录之后的个人主页
start_urls = ['http://sun0769.com/']
strcookies = 'uuid_tt_dd=10_23426688290-1581387665112-721872; dc_session_id=10_1581387665112.225774; searchHistoryArray=%255B%2522Kworky%2522%252C%2522Kwoky%2522%255D; UserName=m0_38055579; UserInfo=57c3962a9b0042e285cb3e108c4b285e; UserToken=57c3962a9b0042e285cb3e108c4b285e; UserNick=%E5%A4%A9%E5%AE%87%E6%8A%80%E5%B8%88; AU=115; UN=m0_38055579; BT=1