#!/usr/bin/bash
# by newrain
# time 2019-11-05 21:50:00
# download baidu images
# tag 1.0
. /etc/init.d/functions
function check(){
time=`date "+%F %X"`
echo "当前时间:$time"
ip=$(ip a |grep inet |grep "scope global dynamic"|awk -F"/" '{print $1}' |awk '{print $2}')
ping -w1 -c1 www.baidu.com &>/dev/null
if [[ $? -ne 0 ]];then
echo_failure || echo "网络异常,请检查"
else
echo_success && echo "网络正常 当前IP:$ip"
fi
yum repolist &>/dev/null
if [[ $? -ne 0 ]];then
echo_failure || echo "yum异常,请检查"
fi
}
function down(){
yum install -y wget &>/dev/null
w=`echo "$1" |tr -d '\n' |od -An -tx1|tr ' ' %`
data=$(curl -s "https://image.baidu.com/search/index?tn=baiduimage&word=$w")
echo $data | grep -P -o "https://ss.*?.jpg" | sort -n -r |uniq >> $2/url2.txt
a=0
while read line
do
sleep 1
code=$(curl -I "$line" 2>/dev/null)
if [ ${code:9:3} -eq 200 ];then
wget -O $2/$1${a}.jpg $line &>/dev/null
echo "下载完成 $line"
else
echo "无效资源 $line"
fi
let a++
done <$2/url2.txt
echo "全部下载完成,下载资源$a个"
return 200
}
function clea(){
echo "正在打包,请稍后"
sleep 2
tar cvzf image.tar.gz $1
rm -rf $1/url2.txt
}
function main(){
check
down $1 $2
if [[ $? -eq 200 ]];then
clea $2
return 100
else
echo "下载异常,正在清理环境"
rm -rf $2
return 101
fi
}
clear
echo -e "本程序功能为爬取百度网站: image.baidu.com 图片\n礼貌爬取,快乐你我他\n程序设计目的:baidu image 测试"
sleep 1
read -p "请输入要爬取的图片关键字,默认为中国): " words
read -p "请输入资源存放路径,默认为 /tmp/image : " path
if [ -z $words ];then
words="中国"
fi
if [ -z $path ];then
path='/tmp/image'
mkdir /tmp/image
fi
main $words $path
if [ $? -eq 100 ];then
echo "程序退出"
else
echo "程序异常退出"
fi
小小shell,也能伟大
最新推荐文章于 2022-03-31 14:41:41 发布