下面是一个简单的PYTHON应用,主要 是使用了python的urllib,re等库,非常简单,可以作为其他的python在网络方面应用的模板(使用Python3测试)
#!/usr/bin/env python
import sys
import re
import urllib.request
from urllib.parse import urlparse
def download(url,flag):
try:
fd=urllib.request.urlopen(url) # Open the URL and get the file description
page=fd.read() # Get the index page html content
unicodePage=page.decode('gb2312') # Get the unicode page html content. Can display chinese character
tempURL=urlparse(url)
tempURL=tempURL.geturl()
tempLIST=tempURL.split('/')
fileName=tempLIST[-1] ##Get the file name via URL
path=tempURL[0:tempURL.index(fileName)] ## get the path info
print("Downloading: ",tempURL,";Saving: ",fileName)
writefd=open(fileName,'w') ## get the write file description
writefd.write(unicodePage) ## write to the file
writefd.close()
except:
pass
if flag==1: # flag==1 shows that the page is the index page
"""
first get the url list
then call download to download the url and saving the html to file
"""
pattern=r'a href="([^"]+)"'
linklist=re.findall(pattern,unicodePage)
for item in linklist:
if not item.startswith('http'):
temp=path+item.strip()
print("!!!!....",temp)
download(temp,0)
if len(sys.argv)
url=sys.argv[1] # Get the URL address
download(url,1)