txt2htm.py
# -*- coding: utf-8 -*-
import os, sys
import glob
if len(sys.argv) ==2:
pattern = sys.argv[1]
else:
print 'usage: txt2htm.py *.txt '
print 'generate ?????.htm '
sys.exit(1)
for f1 in glob.glob(pattern):
print f1
fn,ext = os.path.splitext(f1)
if ext != '.txt':
print 'Error: %s is not txt file ' % f1
sys.exit(4)
headline ="""<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title> 新概念英语 %s </title>
</head>
<body>
""" % (fn)
audio ="""
<audio controls="controls">
<source src="./%s.mp3" type="audio/mp3" />
</audio>
""" % (fn)
fp1 = open(f1,'r')
f2 = fn +'.htm'
fp2 = open(f2,'w')
fp2.write(headline)
ln =0
for line in fp1:
if len(line.strip()) ==0:
continue
ln +=1
if ln ==1:
alist = line.strip().split(' ',1)
aline = '<h3>'+alist[1]+'</h3>'
fp2.write(aline)
else:
if line.startswith("对应音频"):
fp2.write(audio)
elif line.startswith("New Word"):
aline = '<hr>\n<br>'+line
fp2.write(aline)
elif line.startswith("New word"):
aline = '<hr>\n<br>'+line
fp2.write(aline)
elif line.startswith("Notes on"):
aline = '<hr>\n<br>'+line
fp2.write(aline)
elif line.startswith("参考译文"):
aline = '<hr>\n<br>'+line
fp2.write(aline)
elif line.startswith("在线收听地址"):
break
elif line.startswith("document.onclick"):
break
else:
aline = '<br>'+line
fp2.write(aline)
#
fp2.write("</p>\n</body>\n</html>\n")
fp1.close()
fp2.close()
#