args.txt
#! /bin/bash
if [ -z $1 ] || [ ! -e $1 ]
then
echo "Usage: cmd.sh input "
exit
fi
echo $0
for num in $*;do
echo "$num"
done
for i in $(seq -3 $#);
do
echo $i
done
for i in {0..5}
do
echo $i
done
echo $@
for((i=4;i<7;i++));do
echo $i
done
echo "all:$$"
trimReg="s/\(^ *\)\(.*[^ ]\)\( *$\)/\2/"
tmpfile=`cat /proc/sys/kernel/random/uuid`
while read line;
do
value=${line#*=}
key=${line%%=*}
key=`echo ${key}|sed -e "${trimReg}"`
value=`echo ${value}|sed -e "${trimReg}"`
if [ "$key" == "url" ]
then
url=$value
elif [ "$key" == "beginwith" ]
then
beginwith=$value
elif [ "$key" == "endwith" ]
then
endwith=$value
elif [ "$key" == "pagereg" ]
then
pagereg=$value
elif [ "$key" == "savepath" ]
then
savepath=$value
elif [ "$key" == "prefix" ]
then
prefix=$value
elif [ "$key" == "proxy" ]
then proxy=$value
fi
done < $1
echo "url:$url"
echo "beginwith:$beginwith"
echo "pagereg:$pagereg"
echo "endwith:$endwith"
echo "prefix:$prefix"
echo "proxy:$proxy"
echo "savepath:$savepath"
echo "tmpfile:$tmpfile"
if [ -z $proxy ]
then
content=`curl -s $url | iconv -f gbk -t utf-8`
else
content=`curl -x $proxy -s $url | iconv -f gbk -t utf-8`
fi
length=`expr length "${content}"`
echo "download:$length byte(s)"
content=${content#*${beginwith}}
content=${content%%${endwith}*}
length=`expr length "${content}"`
echo "after filer:$length byte(s)"
echo $content|grep -Po "$pagereg"|uniq > $savepath
awk '{a[$0]++}END{for(m in a) print m}' $savepath > $tmpfile
if [ ! -z $prefix ]
then
sed "s/^/$prefix/g" $tmpfile > $savepath
else
cp $tmpfile $savepath
fi
rm -f $tmpfile
str="0000012345456789000000"
echo $str
#str= expr substr $str 1 2
#str=${str:2:3}
str=${str#*0}
echo $str
#trim the string
str=" s = "
str=`echo $str | sed -e "${trimReg}"`
echo [$str]
echo $str | sed -e "${trimReg}"
url = focus.news.163.com
beginwith = <ul class="focuslist-1" id="focusTab-1">
pagereg = (?<=href=\\")http://focus\\.news\\.163\\.com/[\\d]+.+?(?=\\")
endwith = <div class="con-4" area clearfix">
savepath = 163.txt