爬豆瓣1000个用户电影信息

获取1000个用户看的电影信息


#/bin/bash

# name:         get_see_movies.sh
# version:      1.0
# ceateTime:    2018-08-12
# description:  输入豆瓣用户id,获取所有看过的电影以及评分,标签等,并存到数据库中
# author:       mengyanhuangchao
# email:        406993906@qq.com

# description:  判断输入参数是否合

#INFO打印
info_log(){
    echo -e "[INFO]$1"
}

#SUCCESS打印

success_log(){
    echo -e "\033[32m[SUCCESS]\033[0m$1"
}

#ERROR打印
error_log(){
    echo -e "\033[31m[ERROR]\033[0m$1"
}

if [ $# -eq 1 ];then
    if [ -n "$(echo $1| sed -n "/^[0-9]\+$/p")" ];then
        info_log "The user id you searched for is $1"
    else
        error_log "The user id must number"
        exit 1
    fi
else
    error_log "Usage: bash $0 162545416";
    exit 1
fi
user_id=$1

num=1
while [ $num  -lt 20 ];do
    movie_number=`curl -s https://movie.douban.com/people/$user_id/collect|egrep "看过的电影"|awk -F '(' '{print $2}' |awk -F ')' '{print $1}'|uniq`
    user_name=`curl -s https://movie.douban.com/people/$user_id/collect|egrep "看过的电影"|awk -F '看过的电影' '{print $1}'|awk -F '>' '{print $2}'|tail -n1`
    if [  -n "$user_name" ];then
        if [  -n "$movie_number" ];then
            if [ $movie_number -gt 2 ];then
                info_log "$user_name see $movie_number movies"
                info_log 'Please wait a moment....'
                echo "============" >>movietable
                echo "$user_name" >>movietable
                echo "============" >>movietable
                for i in `seq 0 15 $movie_number`;do
                    curl -s https://movie.douban.com/people/$user_id/collect?start=$i > html
                    cat html |egrep "<em>"| egrep -v '= title'|awk -F '>' '{print $2}' |awk -F '<' '{print $1}'|awk -F '/ ' '{print $1}'  > moviename
                    #for moviename in `cat moviename`;do
                     while read line;do
                        moviename=$line
                        cat html| egrep -A13 "<em>$moviename" >test1
                        rating=`cat test1|egrep 'rating' |awk -F 'rating' '{print $2}' |awk -F '-' '{print $1}'`
                        date=`cat test1|egrep 'date' |awk -F '>' '{print $2}' |awk -F '<' '{print $1}'`
                        comment=`cat test1|egrep 'comment' |awk -F '>' '{print $2}' |awk -F '<' '{print $1}'`
                        tags=`cat test1|egrep 'tags' |awk -F '>' '{print $2}' |awk -F '<' '{print $1}'`
                        echo -e "|$moviename |$date |$comment |$tags |$rating" >>movietable
                    done < moviename
                done
                name="$user_name"_"$user_id"
                mv movietable $name
                rm html  moviename  test1
                success_log "all info save $PWD/$name"
                num=$[ $num + 1 ]
                user_id=$[ $user_id + 1 ]
                sleep 3
                echo $user_id
            else
                user_id=$[ $user_id + 1 ]
                sleep 3
                echo $user_id
            fi
        else
            user_id=$[ $user_id + 1 ]
            sleep 3
            echo $user_id
        fi
    else
        user_id=$[ $user_id + 1 ]
        sleep 3
        echo $user_id
    fi
done

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值