#!/bin/bash ##################################################################### # Script: autoPicSmth.sh # Author: cheyo # Email: icheyo at Gmail dot com # From: www.icheyo.net # Date: 2008-02-22 # # Description: # This script is used for downloading pictures from the MyPhoto # board in the newsmth.net automatically. # ##################################################################### # Usage: autoPicSmth.sh [days] # days: download all pictures of recent /days/ days # For Example: ./autoPicSmth.sh 3 WORKING_DIR=working PIC_OUT_DIR=photo DAYS_TO_DOWN=3 QUERY_FILE=QueryResult.tmp THREAD_FILE=ThreadUrl.tmp FORMAT_FILE=ThreadInfo.tmp CURR_THREAD_FILE=CurrThread.tmp PIC_URL_FILE=PicUrl.tmp PIC_DOWN_LOG=PicDown.log PIC_INFO_FILE1=PicInfo1.tmp PIC_INFO_FILE2=PicInfo2.tmp PIC_INFO_FILE3=PicInfoFinal.tmp # ------------------------------------------------------------------ # # ShowUsage() # Show the usage of this script # ------------------------------------------------------------------ # ShowUsage() { echo "This script is used for automatic downloading pictures from MyPhoto board in the newsmth.net" echo " Usage: autoPicSmth.sh [days]" echo " days: download all pictures of recent /days/ days. 3 for default." echo " Example: ./autoPicSmth.sh 3" } # check arguments if [ $# -gt 1 ]; then ShowUsage exit1; elif [ $# -eq 1 ]; then DAYS_TO_DOWN=$1 fi mkdir-p $WORKING_DIR cd $WORKING_DIR # Get the thread search result HTML page to local SearchUrl="http://bbs4.newsmth.net/bbsbfind.php?q=1&board=MyPhoto&dt=${DAYS_TO_DOWN}&ag=1" curl "${SearchUrl}"-o ${QUERY_FILE} # Create a file to store all Thread URLs egrep "<a href="bbscon.php?bid="$QUERY_FILE| awk -F[<>"] '{print "http://bbs4.newsmth.net/"$9}' > $THREAD_FILE ThreadCount=`cat $THREAD_FILE | wc -l` echo "Total ${ThreadCount} threads are found." # Create a file to store all BoardId and ThreadId awk -F[=&] '{print $2,$4}' $THREAD_FILE > $FORMAT_FILE # Create a file to sotre all pictures infomation # Format: BoardId ArticleId FileName FileSize FileId echo "# BoardId ArticleId FileName FileSize FileId" > $PIC_INFO_FILE1 cat $FORMAT_FILE|whileread BoardId ArticleId do ThreadUrl=`echo "http://bbs4.newsmth.net/bbscon.php?bid=$BoardId&id=$ArticleId"` curl "$ThreadUrl"-o $CURR_THREAD_FILE grep"attach"$CURR_THREAD_FILE| tr ");"") "|grep"attach"| awk -F[' ,)] -v BoardId=$BoardId -v ArticleId=$ArticleId '{print BoardId,ArticleId,$2,$5,$7}' >> $PIC_INFO_FILE1 done # Create a file to store all pictures info with file extention name # but not full file name. # Format: BoardId ArticleId FileExt FileSize FileId # echo "# BoardId ArticleId FileExt FileSize FileId" > $PIC_INFO_FILE2 awk -F[. ] '$0~/^[^#]/ {print $1,$2,$4,$5,$6}' $PIC_INFO_FILE1 >> $PIC_INFO_FILE2 # Remove the records which don't contain enough info. # in normal case, it should be 5 columns in the file. awk '$5~/^[^$]/ {print $0}'$PIC_INFO_FILE2>$PIC_INFO_FILE3 # Create a file to store all picture url grep^[^#] $PIC_INFO_FILE3 | while read BoardId ArticleId FileExt FileSize FileId do if [ $FileSize-gt 51200 ]; then FileType="p" else FileType="s" fi PicUrl=`echo "http://att.newsmth.net/att.php?$FileType.$BoardId.$ArticleId.$FileId.$FileExt"` echo "$PicUrl">>$PIC_URL_FILE done # Remove all duplicted URL from the file mv ${PIC_URL_FILE} ${PIC_URL_FILE}.tmp sort-dfu ${PIC_URL_FILE}.tmp > ${PIC_URL_FILE} rm ${PIC_URL_FILE}.tmp # Remove the URLs which have been downed before if [ -f "../${PIC_OUT_DIR}/${PIC_DOWN_LOG}" ]; then cp ../$PIC_OUT_DIR/${PIC_DOWN_LOG} . awk '{print $3}' ${PIC_DOWN_LOG} > ${PIC_URL_FILE}.history sort-dfu ${PIC_URL_FILE}.history > ${PIC_URL_FILE}.tmp mv ${PIC_URL_FILE}.tmp ${PIC_URL_FILE}.history comm -1-3 ${PIC_URL_FILE}.history ${PIC_URL_FILE} > ${PIC_URL_FILE}.tmp mv ${PIC_URL_FILE}.tmp ${PIC_URL_FILE} rm ${PIC_URL_FILE}.history fi # Download all pictures from server to local PicCount=`wc -l $PIC_URL_FILE| awk '{print $1}'` PicIndex=1 mkdir-p ../$PIC_OUT_DIR echo "Total number of pictures to be downloaded: $PicCount" cat $PIC_URL_FILE|whileread CurrUrl do FileName=`echo "$CurrUrl"| awk -F[?] '{print $2}'` echo "[$PicIndex/$PicCount] Start to download $CurrUrl" curl "$CurrUrl"-o ../$PIC_OUT_DIR/$FileName # Write download log to log file CurrTime=`date +"%Y-%m-%d %H:%M:%S"` echo "$CurrTime $CurrUrl">>"../$PIC_OUT_DIR/$PIC_DOWN_LOG" echo "[$PicIndex/$PicCount] Download finished." echo "" PicIndex=`expr $PicIndex+1` done #mv $PIC_URL_FILE ../$PIC_OUT_DIR/PicUrl.list #mv $PIC_INFO_FILE3 ../$PIC_OUT_DIR/PicInfo.list cd .. rm -r $WORKING_DIR echo "All Pictures Downloading finished."
写了一个脚本,用于自动从水木社区(Newsmth.net)的MyPhoto版自动下载图片到本地。运行该脚本后,将自动从水木社区的MyPhoto版自动下载最近N(默认为3,通过参数1指定)天内的所有图片到本地的photo目录下。用法:1、把以下代码保存为autoPicSmth.sh2、为脚本增加可执行权限,并运行脚本。CHEYO:~/auto # chmod +x aut