# ! /usr/bin/env python # -*- coding: utf-8 -*- import urllib sock = urllib.urlopen( " http://www.bitunion.org/ " ) html = sock.read() sock.close()html = unicode(html , " gbk " ) print html # !/usr/bin/env python from sgmllib import SGMLParser import urllib sock = urllib.urlopen( " http://www.bitunion.org/ " ) html = sock.read()sock.close()html = unicode(html , " gbk " ) # print html s = html class Parse(SGMLParser): def reset(self): self.found_title = 0 SGMLParser.reset(self) def start_title(self, attrs): self.found_title += 1 def end_title(self): self.found_title -= 1 def handle_data(self, text): if self.found_title > 0: print ' Title: %s ' % textp = Parse()p.feed(s) 最后注视