# this script is use to dig href url from html file s/<a \([^>]*\)>/--SYN--\1--FIN--/g; s/[><]//g; s/--FIN--/>/g; s/--SYN--/</g; s/^\(.*\)$/>\1</; s/>[^<]*</></g; s#<[^>]*href=[^a-zA-Z>]*\(http://[^ >]*\)[^>]*>#@\1@#g; s/<[^>]*>//g; s/@@/\ /g; s/[><'"@]//g;
/^ *$/d;
sed脚本2:
:a; h; s@^[^<]*<a\s*[^>]*\s*href\s*=\s*['"]*\(http://[^> "']*\)[^>]*>.*@\1@p; g; s@<[a-zA-Z/][a-zA-Z]*[^>]*>@@; t a; /<[a-zA-Z\/][a-zA-Z]*[^>]*$/{N; b a;}; d;