python转换word到html,Python实现批量将word转html并将html内容发布至网站的方法

#coding=utf-8

__author__ = 'zhm'

from win32com import client as wc

import os

import time

import random

import MySQLdb

import re

def wordsToHtml(dir):

#批量把文件夹的word文档转换成html文件

#金山WPS调用,抢先版的用KWPS,正式版WPS

word = wc.Dispatch('KWPS.Application')

for path, subdirs, files in os.walk(dir):

for wordFile in files:

wordFullName = os.path.join(path, wordFile)

#print "word:" + wordFullName

doc = word.Documents.Open(wordFullName)

wordFile2 = unicode(wordFile, "gbk")

dotIndex = wordFile2.rfind(".")

if(dotIndex == -1):

print '********************ERROR: 未取得后缀名!'

fileSuffix = wordFile2[(dotIndex + 1) : ]

if(fileSuffix == "doc" or fileSuffix == "docx"):

fileName = wordFile2[ : dotIndex]

htmlName = fileName + ".html"

htmlFullName = os.path.join(unicode(path, "gbk"), htmlName)

# htmlFullName = unicode(path, "gbk") + "\\" + htmlName

print u'生成了html文件:' + htmlFullName

doc.SaveAs(htmlFullName, 8)

doc.Close()

word.Quit()

print ""

print "Finished!"

def html_add_to_db(dir):

#将转换成功的html文件批量插入数据库中。

conn = MySQLdb.connect(

host='localhost',

port=3306,

user='root',

passwd='root',

db='test',

charset='utf8'

)

cur = conn.cursor()

for path, subdirs, files in os.walk(dir):

for htmlFile in files:

htmlFullName = os.path.join(path, htmlFile)

title = os.path.splitext(htmlFile)[0]

targetDir = 'D:/files/htmls/'

#D:/files为web服务器配置的静态目录

sconds = time.time()

msconds = sconds * 1000

targetFile = os.path.join(targetDir, str(int(msconds))+str(random.randint(100, 10000)) +'.html')

htmlFile2 = unicode(htmlFile, "gbk")

dotIndex = htmlFile2.rfind(".")

if(dotIndex == -1):

print '********************ERROR: 未取得后缀名!'

fileSuffix = htmlFile2[(dotIndex + 1) : ]

if(fileSuffix == "htm" or fileSuffix == "html"):

if not os.path.exists(targetDir):

os.makedirs(targetDir)

htmlFullName = os.path.join(unicode(path, "gbk"), htmlFullName)

htFile = open(htmlFullName,'rb')

#获取网页内容

htmStrCotent = htFile.read()

#找出里面的图片

img=re.compile(r"""""",re.I)

m = img.findall(htmStrCotent)

for tagContent in m:

imgSrc = unicode(tagContent, "gbk")

imgSrcFullName = os.path.join(path, imgSrc)

#上传图片

imgTarget = 'D:/files/images/whzx/'

img_sconds = time.time()

img_msconds = sconds * 1000

targetImgFile = os.path.join(imgTarget, str(int(img_msconds))+str(random.randint(100, 10000)) +'.png')

if not os.path.exists(imgTarget):

os.makedirs(imgTarget)

if not os.path.exists(targetImgFile) or(os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) != os.path.getsize(imgSrcFullName))):

tmpImgFile = open(imgSrcFullName,'rb')

tmpWriteImgFile = open(targetImgFile, "wb")

tmpWriteImgFile.write(tmpImgFile.read())

tmpImgFile.close()

tmpWriteImgFile.close()

htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(":")[1])

if not os.path.exists(targetFile) or(os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(htmlFullName))):

#用iframe包装转换好的html文件。

iframeHtml='''

function iFrameHeight() {

var ifm= document.getElementById("iframepage");

var subWeb = document.frames ? document.frames["iframepage"].document:ifm.contentDocument;

if(ifm != null && subWeb != null) {

ifm.height = subWeb.body.scrollHeight;

}

}

marginheight="0" marginwidth="0" frameborder="0" scrolling="no" width="765" height=100% id="iframepage" name="iframepage" onLoad="iFrameHeight()" >

'''

tmpTargetFile = open(targetFile, "wb")

tmpTargetFile.write(htmStrCotent)

tmpTargetFile.close()

htFile.close()

try:

# 执行

sql = "insert into common_article(title,content) values(%s,%s)"

param = (unicode(title, "gbk"),iframeHtml)

cur.execute(sql,param)

except:

print "Error: unable to insert data"

cur.close()

conn.commit()

# 关闭数据库连接

conn.close()

if __name__ == '__main__':

wordsToHtml('d:/word')

html_add_to_db('d:/word')

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值