[python]代码库#! /usr/bin/env python
from sys import argv
from os import makedirs,unlink,sep
from os.path import dirname,exists,isdir,splitext
from string import replace,find,lower
from htmllib import HTMLParser
from urllib import urlretrieve
from urlparse import urlparse,urljoin
from formatter import DumbWriter,AbstractFormatter
from cStringIO import StringIO
class Retriever(object): # download web page
def __init__(self,url):
self.url = url
self.file = self.filename(url)
def filename(self,url,deffile='index.html'):
parsedurl = urlparse(url,'http',0) # parse path
path = parsedurl[1] + parsedurl[2]
ext = splitext(path)
if ext[1] == '': # no file,use default
if path[-1] == '/':
path += deffile