构造url
配置:
crawl_config = {
"headers" : headers,
"timeout" : 1000,
"cookies" : Cookie
"proxy" : 192.168.1.1:8888
}
# 示例
crawl_config = {
"headers":{
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36"
}
}
构造url:
#url后面带数字的
def __init__(self):
self.base_url = 'http://xxx...../p'
self.page_num = 1
self.total_num = 5
def on_start(self):
while self.page_num <= self.total_num:
url = self.base_url + str(se