#!/bin/bash
CONDA_PATH="/root/anaconda2" # conda dir
CONDA_VENV_NAME="douyin" # 虚拟环境名
PROJECT_NAME="douyin" # 项目名,可随意
PROJECT_PATH="/data/douyin/douyinChallenge/douyinChallenge" # scrapy项目路径
SPIDER_NAME="douyin_challenge" # 爬虫名
PID_FILE="${PROJECT_PATH}/logs/${PROJECT_NAME}.pid" # pid文件路径
LOG_FILE="${PROJECT_PATH}/logs/${PROJECT_NAME}`date +%Y-%m-%d`.log" # 日志路径
# 由于使用conda虚拟环境,需要增加以下代码以顺利进入虚拟环境
__conda_setup="$('${CONDA_PATH}/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "${CONDA_PATH}/etc/profile.d/conda.sh" ]; then
. "${CONDA_PATH}/etc/profile.d/conda.sh"
else
export PATH="${CONDA_PATH}/bin:$PATH"
fi
fi
unset __conda_setup
source ~/.bashrc
start()
{
# 开启程序,可以传入开启数量,默认为4
if [ -f ${PID_FILE} ];then
SPID=`cat ${PID_FILE}`
if [ "$SPID" != "" ];then
echo "maybe process is runing,please stop it first"
exit
fi
fi
echo "Starting"
conda activate ${CONDA_VENV_NAME}
cd ${PROJECT_PATH}
PROCESS_NUM=$2
for((i=0;i<${PROCESS_NUM:-4};i++));
do
nohup scrapy crawl ${SPIDER_NAME} >>$LOG_FILE 2>&1 & pid=$!
echo "$pid" >> $PID_FILE
echo "Started"
done
}
stop()
{
# 根据Pid文件关闭程序
if [ -f ${PID_FILE} ];then
PIDS=`cat ${PID_FILE}`
if [ "$PIDS" != "" ];then
kill -2 $PIDS
echo > $PID_FILE
echo "stop success"
fi
fi
}
checkProcessStatus()
{
CURRENT_PID=$1
if [ "$CURRENT_PID" != "" ] ;then
CURRENT_PIDLIST=`ps -ef|grep $CURRENT_PID|grep -v grep|awk -F" " '{print $2}'`
else
return 0
fi
for PID in `echo $CURRENT_PIDLIST`
do
if [ "$CURRENT_PID" = "" ] ;then
PID1="$CURRENT_PID"
else
PID1="$PID"
fi
if [ "$PID1" = "$CURRENT_PID" ] ;then
kill -0 $PID >/dev/null 2>&1 # 检查进程
if [ $? != 0 ] ;then
echo "[`date`] ${SPIDER_NAME}: Process $i have Dead"
kill -9 $PID >/dev/null 2>&1
return 1
else
echo "[`date`] ${SPIDER_NAME}: Process is alive"
return 0
fi
fi
done
echo "[`date`] ${SPIDER_NAME}: Process $PID is not exists"
return 1
}
status()
{
PIDS=`cat ${PID_FILE}`
if [ "$PIDS" == "" ] ;then
echo "${SPIDER_NAME} crawler:${PID} No Process Running."
return
fi
for PID in $PIDS
do
checkProcessStatus $PID > /dev/null
if [ $? != 0 ];then
echo "${SPIDER_NAME} crawler:${PID} Have Stopped ...."
else
echo "${SPIDER_NAME} crawler:${PID} Running Normal."
fi
done
}
restart()
{
echo "Stoping ... "
stop
echo "Staring ..."
start
}
case "$1" in
start)
start
;;
stop)
stop
;;
status)
status
;;
restart)
restart
;;
*)
echo $"Usage: $0 {start|stop|restart|status}"
RETVAL=1
esac
exit $RETVAL
基于conda环境的Scrapy Python爬虫简易管理脚本
最新推荐文章于 2023-12-31 13:25:11 发布