python新闻聚合_PYTHON:新闻聚合

from nntplib importNNTPfrom time importstrftime,time,localtimefrom email importmessage_from_stringfrom urllib importurlopenimporttextwrapimportre

day= 24*60*60

def wrap(string,max=70):'''

'''

return '\n'.join(textwrap.wrap(string)) + '\n'

classNewsAgent:'''

'''

def __init__(self):

self.sources=[]

self.destinations=[]defaddSource(self,source):

self.sources.append(source)defaddDestination(self,dest):

self.destinations.append(dest)defdistribute(self):

items=[]for source inself.sources:

items.extend(source.getItems())for dest inself.destinations:

dest.receiveItems(items)classNewsItem:def __init__(self,title,body):

self.title=title

self.body=bodyclassNNTPSource:def __init__(self,servername,group,window):

self.servername=servername

self.group=group

self.window=windowdefgetItems(self):

start= localtime(time() - self.window*day)

date= strftime('%y%m%d',start)

hour= strftime('%H%M%S',start)

server=NNTP(self.servername)

ids= server.newnews(self.group,date,hour)[1]for id inids:

lines= server.article(id)[3]

message= message_from_string('\n'.join(lines))

title= message['subject']

body=message.get_payload()ifmessage.is_multipart():

body=body[0]yieldNewsItem(title,body)

server.quit()classSimpleWebSource:def __init__(self,url,titlePattern,bodyPattern):

self.url=url

self.titlePattern=re.compile(titlePattern)

self.bodyPattern=re.compile(bodyPattern)defgetItems(self):

text=urlopen(self.url).read()

titles=self.titlePattern.findall(text)

bodies=self.bodyPattern.findall(text)for title.body inzip(titles,bodies):yieldNewsItem(title,wrap(body))classPlainDestination:defreceiveItems(self,items):for item initems:printitem.titleprint '-'*len(item.title)printitem.bodyclassHTMLDestination:def __init__(self,filename):

self.filename=filenamedefreceiveItems(self,items):

out= open(self.filename,'w')print >> out,'''

Today's News

Today's News'''

print >> out, '

  • 'id=0for item initems:

id+= 1

print >> out, '

%s' %(id,item.title)print >> out, ''id=0for item initems:

id+= 1

print >> out, '

%s

' %(id,item.title)print >> out, '
%s
' %item.bodyprint >> out, '''

'''

defrunDefaultSetup():

agent=NewsAgent()

bbc_url= 'http://news.bbc.co.uk/text_only.stm'bbc_title= r'(?s)a href="[^"]*">\s*\s*(.*?)\s*'bbc_body= r'(?s)\s*
\s*(.*?)\s*

agent.addSource(bbc)

clpa_server= 'news2.neva.ru'clpa_group= 'alt.sex.telephone'clpa_window= 1clpa=NNTPSource(clpa_server,clpa_group,clpa_window)

agent.addSource(clpa)

agent.addDestination(PlainDestination())

agent.addDestination(HTMLDestination('news.html'))

agent.distribute()if __name__ == '__main__':

runDefaultSetup()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值