没有好用的美国新闻api,只好从网页上抓取
可以获得新闻列表和url(大约80条),并可以获取具体每一条新闻的文字内容:
'''
Author : Peizhong Ju
Date : Apr. 29, 2016
Function : Get the news from WashingtonPost [python2.7]
'''
import urllib2
import HTMLParser
import random
class GetList(HTMLParser.HTMLParser): #get the url list of all articles
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.outputFlag = False
self.count = 0
self.link = ''
self.linkList = []
def handle_starttag(self, tag, attrs):
if tag == 'a':
for key, value in attrs:
if key == 'data-pb-field':
if value == 'web_headline':
self.outputFlag = True
if key == 'href':
self.link = value
d