原html文件 tmpl.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>无标题文档</title>
</head>
<body>
</body>
</html>
python代码
from pyquery import PyQuery as pq
from lxml import etree
import urllib
d = pq(filename='tmpl.html', parser='html')
print d.outerHtml()
f = file('out.html', 'w')
f.write(d.outerHtml().encode('utf-8'))
f.close()