# -*- coding: gbk -*-
import urllib2
from sgmllib import SGMLParser

class HotExtract(SGMLParser):
   

    def __init__(self):
        SGMLParser.__init__(self)
        self.is_a = ""
        self.hot = []
       
    def start_a(self, attrs):
        if len(attrs) == 0:
            pass
        else:
            for (variable, value) in attrs:
                if variable == "mon" and value == "ct=1&a=30":
                    self.is_a = 1
                    break
               
    def end_a(self):
        self.is_a = ""
       
    def handle_data(self, text):
        if self.is_a == 1:
            self.hot.append(text)


def getHtml(url):
    html = urllib2.urlopen(url).read()
    return html

def extract_hot(html):
    hotExtract = HotExtract()
    hotExtract.feed(html)
    return hotExtract.hot

html = getHtml("http://news.baidu.com/")
hot_list = extract_hot(html)
for hot in hot_list:
    print hot