#!/usr/bin/env python
# -*- coding: utf-8 -*-
#---------------------------------------
# 程序:XML解析器
# 版本:01.0
# 作者:mupeng
# 日期:2013-12-18
# 语言:Python 2.7
# 功能:将xml解析成对应的html
# 注解:该程序用xml.sax模块的parse函数解析XML,并生成事件
# 继承ContentHandler并重写其事件处理函数
# Dispatcher主要用于相应标签的起始、结束事件的派发
#---------------------------------------
from xml.sax.handler import ContentHandler
from xml.sax import parse
class Dispatcher:
def dispatch(self,prefix,name,attrs=None):
mname = prefix + name.capitalize()
dname = 'default' + prefix.capitalize()
method = getattr(self,mname,None)
if callable(method): args = ()
else:
method = getattr(self,dname,None)
#args = name
#if prefix == 'start': args += attrs
if callable(method): method()
def startElement(self,attrs):
self.dispatch('start',attrs)
def endElement(self,name):
self.dispatch('end',name)
class Website(Dispatcher,ContentHandler):
def __init__(self):
self.fout = open('ddt_SAX.html','w')
self.imagein = False
self.desflag = False
self.item = False
self.title = ''
self.link = ''
self.guid = ''
self.url = ''
self.pubdate = ''
self.description = ''
self.temp = ''
self.prx = ''
def startChannel(self):
self.fout.write('''\n
\n RSS-''')def endChannel(self):
self.fout.write('''
function GetTimeDiff(str)
{
if(str == '')
{
return '';
}
var pubDate = new Date(str);
var nowDate = new Date();
var diffMilSeconds = nowDate.valueOf()-pubDate.valueOf();
var days = diffMilSeconds/86400000;
days = parseInt(days);
diffMilSeconds = diffMilSeconds-(days*86400000);
var hours = diffMilSeconds/3600000;
hours = parseInt(hours);
diffMilSeconds = diffMilSeconds-(hours*3600000);
var minutes = diffMilSeconds/60000;
minutes = parseInt(minutes);
diffMilSeconds = diffMilSeconds-(minutes*60000);
var seconds = diffMilSeconds/1000;
seconds = parseInt(seconds);
var returnStr = "±±¾©・¢²¼Ê±¼ä£º" + pubDate.toLocaleString();
if(days > 0)
{
returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + days + "Ìì" + hours + "Сʱ" + minutes + "・ÖÖÓ£©";
}
else if (hours > 0)
{
returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + hours + "Сʱ" + minutes + "・ÖÖÓ£©";
}
else if (minutes > 0)
{
returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + minutes + "・ÖÖÓ£©";
}
return returnStr;
}
function GetSpanText()
{
var pubDate;
var pubDateArray;
var spanArray = document.getElementsByTagName("span");
for(var i = 0; i < spanArray.length; i++)
{
pubDate = spanArray[i].innerHTML;
document.getElementsByTagName("span")[i].innerHTML = GetTimeDiff(pubDate);
}
}
GetSpanText();
''')
self.fout.close()
def characters(self,chars):
if chars.strip():
#chars = chars.strip()
self.temp += chars
#print self.temp
def startTitle(self):
if self.item:
self.fout.write('''
\n\n''')
def endTitle(self):
if not self.imagein and not self.item:
self.title = self.temp
self.temp = ''
self.fout.write(self.title.encode('gb2312'))
#self.title = self.temp
self.fout.write('''
\n\n
\nfunction copyLink()
{
clipboardData.setData("Text",window.location.href);
alert("RSSÁ´½ÓÒѾ¸´ÖƵ½¼ôÌù°å");
}
function subscibeLink()
{
var str = window.location.pathname;
while(str.match(/^\//))
{
str = str.replace(/^\//,"");
}
window.open("http://RSS.sina.com.cn/my_sina_web_RSS_news.html?url=" + str,"_self");
}
\n
\n ''') if self.item: self.title = self.temp self.temp = '' self.fout.write(self.title.encode('gb2312')) self.fout.write(''' | |
''') def startImage(self): self.imagein = True def endImage(self): self.imagein = False def startLink(self): if self.imagein: self.fout.write('''\n ''') elif self.item: #self.link = self.temp pass else: self.fout.write(self.link) self.fout.write(''' " target=" _blank "> ''') self.fout.write(self.title.encode('gb2312')) self.fout.write(''' | |
''') self.fout.write(self.description.encode('gb2312')) self.fout.write(''' | |
¸´ÖÆ´ËÒ³Á´½Ó ÎÒҪǶÈë¸ÃÐÂÎÅÁÐ±íµ½ÎÒµÄÒ³Ã棨¼òµ¥¡¢¿ìËÙ¡¢ÊµÊ±¡¢Ãâ・Ñ£© |
''')
def startUrl(self):
if self.imagein:
self.fout.write('''\n
self.fout.write(self.guid)
self.fout.write('''
''')self.fout.write(self.pubdate)
self.fout.write('''
''')#程序入口
if __name__ == '__main__':
parse('ddt.xml',Website())