好久没碰python了。。写一个简单的热热手
''' Created on 2014.2.25 for QQnews @author: accyao ''' import sys import urllib2 import urllib import re import os import time reload(sys) sys.setdefaultencoding('utf-8') headers = { 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6' } tm = time.strftime('%Y%m%d',time.localtime(time.time())) print(tm) def getTitle(page): key = "<title>.*</title>" title = re.findall(key, page) return title def getText(page): key = "<P style=\"TEXT-INDENT: 2em\">.*</P>" text = re.findall(key,page) return text def dlNews(idx): tmp = urllib2.Request( url = 'http://news.qq.com/a/'+str(tm)+'/'+str("%06d"%idx)+'.htm', headers = headers ) try: html = urllib2.urlopen(tmp).read() title = getTitle(html)[0] title = title.replace('<title>','') title = title.replace('</title>','') if(os.path.isfile(title+".txt")==0): filename = title+".txt" fl = file(filename,'w') text = getText(html)[0] text = text.replace('<P style=\"TEXT-INDENT: 2em\">','') text = text.replace('</P>','\n') text = re.sub('<[^>]*>',' ',text) fl.write(text) except urllib2.HTTPError,e: print(e.reason) def main(): for i in range(1803,1804): dlNews(i) main()