场景
使用scrapy_redis写爬虫时,需要向redis写入初始url才能启动爬虫。当工程下爬虫名多了之后,还是以脚本的方式实现方便一点
使用
python scripy_name.py spider_name
脚本
import sys
import redis
REDIS_HOST = "redis host"
REDIS_PORT = 6379
REDIS_DB = 0
REDIS_PWD = "redis password"
spiders = {
'spider_name1': "url1",
"spider_name2": "url2",
"spider_name3": "url3",
"spider_name4": "url4"
}
def update(spider_name):
client = redis.Redis(
host=REDIS_HOST,
port=REDIS_PORT,
db=REDIS_DB,
password=REDIS_PWD
)
client.lpush(
"{}:start_url".format(spider_name),
spiders[spider_name]
)
print("任务添加成功")
if __name__ == '__main__':
# 参数:需要上传任务的爬虫
spider_name = sys.argv[1]
if spider_name not in spiders:
print("命令行参数错误")
sys.exit(1)
update(spider_name)