写了一个脚本,用于自动从水木社区(Newsmth.net)的MyPhoto版自动下载图片到本地。
运行该脚本后,将自动从水木社区的MyPhoto版自动下载最近N(默认为3,通过参数1指定)天内的所有图片到本地的photo目录下。
用法:
1、把以下代码保存为autoPicSmth.sh
2、为脚本增加可执行权限,并运行脚本。
CHEYO:~/auto # chmod +x autoPicSmth.sh
CHEYO:~/auto # ./autoPicSmth.sh
CHEYO:~/auto # ./autoPicSmth.sh
脚本写得比较粗糙,欢迎优化改进。
源码:
#
!/bin/bash
#####################################################################
# Script: autoPicSmth.sh
# Author: cheyo
# Email: icheyo at Gmail dot com
# From: www.icheyo.net
# Date: 2008-02-22
#
# Description:
# This script is used for downloading pictures from the MyPhoto
# board in the newsmth.net automatically.
#
#####################################################################
# Usage: autoPicSmth.sh [days]
# days: download all pictures of recent /days/ days
# For Example: ./autoPicSmth.sh 3
WORKING_DIR = working
PIC_OUT_DIR = photo
DAYS_TO_DOWN = 3
QUERY_FILE = QueryResult . tmp
THREAD_FILE = ThreadUrl . tmp
FORMAT_FILE = ThreadInfo . tmp
CURR_THREAD_FILE = CurrThread . tmp
PIC_URL_FILE = PicUrl . tmp
PIC_DOWN_LOG = PicDown . log
PIC_INFO_FILE1 = PicInfo1 . tmp
PIC_INFO_FILE2 = PicInfo2 . tmp
PIC_INFO_FILE3 = PicInfoFinal . tmp
# ------------------------------------------------------------------ #
# ShowUsage()
# Show the usage of this script
# ------------------------------------------------------------------ #
ShowUsage()
{
echo " This script is used for automatic downloading pictures from MyPhoto board in the newsmth.net "
echo " Usage: autoPicSmth.sh [days] "
echo " days: download all pictures of recent /days/ days. 3 for default. "
echo " Example: ./autoPicSmth.sh 3 "
}
# check arguments
if [ $ # -gt 1 ]; then
ShowUsage
exit 1 ;
elif [ $ # -eq 1 ]; then
DAYS_TO_DOWN = $ 1
fi
mkdir - p $WORKING_DIR
cd $WORKING_DIR
# Get the thread search result HTML page to local
SearchUrl = " http://bbs4.newsmth.net/bbsbfind.php?q=1&board=MyPhoto&dt=${DAYS_TO_DOWN}&ag=1 "
curl " ${SearchUrl} " - o ${QUERY_FILE}
# Create a file to store all Thread URLs
egrep " <a href="bbscon.php?bid= " $QUERY_FILE | awk - F[ <> " ] '{print " http :// bbs4 . newsmth . net / " $9}' > $THREAD_FILE
ThreadCount=`cat $THREAD_FILE | wc -l`
echo " Total ${ThreadCount} threads are found . "
# Create a file to store all BoardId and ThreadId
awk -F[=&] '{print $2,$4}' $THREAD_FILE > $FORMAT_FILE
# Create a file to sotre all pictures infomation
# Format: BoardId ArticleId FileName FileSize FileId
echo " # BoardId ArticleId FileName FileSize FileId" > $PIC_INFO_FILE1
cat $FORMAT_FILE | while read BoardId ArticleId
do
ThreadUrl = `echo " http://bbs4.newsmth.net/bbscon.php?bid=$BoardId&id=$ArticleId " `
curl " $ThreadUrl " - o $CURR_THREAD_FILE
grep " attach " $CURR_THREAD_FILE | tr " ); " " ) " | grep " attach " | awk - F[ ' ,)] -v BoardId=$BoardId -v ArticleId=$ArticleId ' { print BoardId , ArticleId , $ 2 , $ 5 , $ 7 } ' >> $PIC_INFO_FILE1
done
# Create a file to store all pictures info with file extention name
# but not full file name.
# Format: BoardId ArticleId FileExt FileSize FileId
# echo "# BoardId ArticleId FileExt FileSize FileId" > $PIC_INFO_FILE2
awk -F[. ] ' $ 0 ~/^ [ ^ # ]/ {print $1,$2,$4,$5,$6}' $PIC_INFO_FILE1 >> $PIC_INFO_FILE2
# Remove the records which don't contain enough info.
# in normal case, it should be 5 columns in the file.
awk ' $5~/^[^$]/ {print $0} ' $PIC_INFO_FILE2 > $PIC_INFO_FILE3
# Create a file to store all picture url
grep ^ [ ^ # ] $PIC_INFO_FILE3 | while read BoardId ArticleId FileExt FileSize FileId
do
if [ $FileSize - gt 51200 ]; then
FileType = " p "
else
FileType = " s "
fi
PicUrl = `echo " http://att.newsmth.net/att.php?$FileType.$BoardId.$ArticleId.$FileId.$FileExt " `
echo " $PicUrl " >> $PIC_URL_FILE
done
# Remove all duplicted URL from the file
mv ${PIC_URL_FILE} ${PIC_URL_FILE} . tmp
sort - dfu ${PIC_URL_FILE} . tmp > ${PIC_URL_FILE}
rm ${PIC_URL_FILE} . tmp
# Remove the URLs which have been downed before
if [ - f " ../${PIC_OUT_DIR}/${PIC_DOWN_LOG} " ]; then
cp ../ $PIC_OUT_DIR / ${PIC_DOWN_LOG} .
awk ' {print $3} ' ${PIC_DOWN_LOG} > ${PIC_URL_FILE} . history
sort - dfu ${PIC_URL_FILE} . history > ${PIC_URL_FILE} . tmp
mv ${PIC_URL_FILE} . tmp ${PIC_URL_FILE} . history
comm - 1 - 3 ${PIC_URL_FILE} . history ${PIC_URL_FILE} > ${PIC_URL_FILE} . tmp
mv ${PIC_URL_FILE} . tmp ${PIC_URL_FILE}
rm ${PIC_URL_FILE} . history
fi
# Download all pictures from server to local
PicCount = `wc - l $PIC_URL_FILE | awk ' {print $1} ' `
PicIndex = 1
mkdir - p ../ $PIC_OUT_DIR
echo " Total number of pictures to be downloaded: $PicCount "
cat $PIC_URL_FILE | while read CurrUrl
do
FileName = `echo " $CurrUrl " | awk - F[ ? ] ' {print $2} ' `
echo " [$PicIndex/$PicCount] Start to download $CurrUrl "
curl " $CurrUrl " - o ../ $PIC_OUT_DIR / $FileName
# Write download log to log file
CurrTime = `date + " %Y-%m-%d %H:%M:%S " `
echo " $CurrTime $CurrUrl " >> " ../$PIC_OUT_DIR/$PIC_DOWN_LOG "
echo " [$PicIndex/$PicCount] Download finished. "
echo ""
PicIndex = `expr $PicIndex + 1 `
done
# mv $PIC_URL_FILE ../$PIC_OUT_DIR/PicUrl.list
#mv $PIC_INFO_FILE3 ../$PIC_OUT_DIR/PicInfo.list
cd ..
rm - r $WORKING_DIR
echo " All Pictures Downloading finished. "
#####################################################################
# Script: autoPicSmth.sh
# Author: cheyo
# Email: icheyo at Gmail dot com
# From: www.icheyo.net
# Date: 2008-02-22
#
# Description:
# This script is used for downloading pictures from the MyPhoto
# board in the newsmth.net automatically.
#
#####################################################################
# Usage: autoPicSmth.sh [days]
# days: download all pictures of recent /days/ days
# For Example: ./autoPicSmth.sh 3
WORKING_DIR = working
PIC_OUT_DIR = photo
DAYS_TO_DOWN = 3
QUERY_FILE = QueryResult . tmp
THREAD_FILE = ThreadUrl . tmp
FORMAT_FILE = ThreadInfo . tmp
CURR_THREAD_FILE = CurrThread . tmp
PIC_URL_FILE = PicUrl . tmp
PIC_DOWN_LOG = PicDown . log
PIC_INFO_FILE1 = PicInfo1 . tmp
PIC_INFO_FILE2 = PicInfo2 . tmp
PIC_INFO_FILE3 = PicInfoFinal . tmp
# ------------------------------------------------------------------ #
# ShowUsage()
# Show the usage of this script
# ------------------------------------------------------------------ #
ShowUsage()
{
echo " This script is used for automatic downloading pictures from MyPhoto board in the newsmth.net "
echo " Usage: autoPicSmth.sh [days] "
echo " days: download all pictures of recent /days/ days. 3 for default. "
echo " Example: ./autoPicSmth.sh 3 "
}
# check arguments
if [ $ # -gt 1 ]; then
ShowUsage
exit 1 ;
elif [ $ # -eq 1 ]; then
DAYS_TO_DOWN = $ 1
fi
mkdir - p $WORKING_DIR
cd $WORKING_DIR
# Get the thread search result HTML page to local
SearchUrl = " http://bbs4.newsmth.net/bbsbfind.php?q=1&board=MyPhoto&dt=${DAYS_TO_DOWN}&ag=1 "
curl " ${SearchUrl} " - o ${QUERY_FILE}
# Create a file to store all Thread URLs
egrep " <a href="bbscon.php?bid= " $QUERY_FILE | awk - F[ <> " ] '{print " http :// bbs4 . newsmth . net / " $9}' > $THREAD_FILE
ThreadCount=`cat $THREAD_FILE | wc -l`
echo " Total ${ThreadCount} threads are found . "
# Create a file to store all BoardId and ThreadId
awk -F[=&] '{print $2,$4}' $THREAD_FILE > $FORMAT_FILE
# Create a file to sotre all pictures infomation
# Format: BoardId ArticleId FileName FileSize FileId
echo " # BoardId ArticleId FileName FileSize FileId" > $PIC_INFO_FILE1
cat $FORMAT_FILE | while read BoardId ArticleId
do
ThreadUrl = `echo " http://bbs4.newsmth.net/bbscon.php?bid=$BoardId&id=$ArticleId " `
curl " $ThreadUrl " - o $CURR_THREAD_FILE
grep " attach " $CURR_THREAD_FILE | tr " ); " " ) " | grep " attach " | awk - F[ ' ,)] -v BoardId=$BoardId -v ArticleId=$ArticleId ' { print BoardId , ArticleId , $ 2 , $ 5 , $ 7 } ' >> $PIC_INFO_FILE1
done
# Create a file to store all pictures info with file extention name
# but not full file name.
# Format: BoardId ArticleId FileExt FileSize FileId
# echo "# BoardId ArticleId FileExt FileSize FileId" > $PIC_INFO_FILE2
awk -F[. ] ' $ 0 ~/^ [ ^ # ]/ {print $1,$2,$4,$5,$6}' $PIC_INFO_FILE1 >> $PIC_INFO_FILE2
# Remove the records which don't contain enough info.
# in normal case, it should be 5 columns in the file.
awk ' $5~/^[^$]/ {print $0} ' $PIC_INFO_FILE2 > $PIC_INFO_FILE3
# Create a file to store all picture url
grep ^ [ ^ # ] $PIC_INFO_FILE3 | while read BoardId ArticleId FileExt FileSize FileId
do
if [ $FileSize - gt 51200 ]; then
FileType = " p "
else
FileType = " s "
fi
PicUrl = `echo " http://att.newsmth.net/att.php?$FileType.$BoardId.$ArticleId.$FileId.$FileExt " `
echo " $PicUrl " >> $PIC_URL_FILE
done
# Remove all duplicted URL from the file
mv ${PIC_URL_FILE} ${PIC_URL_FILE} . tmp
sort - dfu ${PIC_URL_FILE} . tmp > ${PIC_URL_FILE}
rm ${PIC_URL_FILE} . tmp
# Remove the URLs which have been downed before
if [ - f " ../${PIC_OUT_DIR}/${PIC_DOWN_LOG} " ]; then
cp ../ $PIC_OUT_DIR / ${PIC_DOWN_LOG} .
awk ' {print $3} ' ${PIC_DOWN_LOG} > ${PIC_URL_FILE} . history
sort - dfu ${PIC_URL_FILE} . history > ${PIC_URL_FILE} . tmp
mv ${PIC_URL_FILE} . tmp ${PIC_URL_FILE} . history
comm - 1 - 3 ${PIC_URL_FILE} . history ${PIC_URL_FILE} > ${PIC_URL_FILE} . tmp
mv ${PIC_URL_FILE} . tmp ${PIC_URL_FILE}
rm ${PIC_URL_FILE} . history
fi
# Download all pictures from server to local
PicCount = `wc - l $PIC_URL_FILE | awk ' {print $1} ' `
PicIndex = 1
mkdir - p ../ $PIC_OUT_DIR
echo " Total number of pictures to be downloaded: $PicCount "
cat $PIC_URL_FILE | while read CurrUrl
do
FileName = `echo " $CurrUrl " | awk - F[ ? ] ' {print $2} ' `
echo " [$PicIndex/$PicCount] Start to download $CurrUrl "
curl " $CurrUrl " - o ../ $PIC_OUT_DIR / $FileName
# Write download log to log file
CurrTime = `date + " %Y-%m-%d %H:%M:%S " `
echo " $CurrTime $CurrUrl " >> " ../$PIC_OUT_DIR/$PIC_DOWN_LOG "
echo " [$PicIndex/$PicCount] Download finished. "
echo ""
PicIndex = `expr $PicIndex + 1 `
done
# mv $PIC_URL_FILE ../$PIC_OUT_DIR/PicUrl.list
#mv $PIC_INFO_FILE3 ../$PIC_OUT_DIR/PicInfo.list
cd ..
rm - r $WORKING_DIR
echo " All Pictures Downloading finished. "
http://write.blog.csdn.net/postedit