JAVA版:
首先,需要jacob.jar这个包,我用的是jacob-1.15-M4这个版本,
1)把jacob.dll放入 Java\jdk1.5.0_06\jre\bin目录下.
2)把jacob-1.15-M4-x64.dll放入到window\system32目录下 (注意64位机子用*64版本的dll,32位的机子用*86版本的dll)
3) 把jacob-1.15-M4-x64.dll放入到Java\jdk1.5.0_06\bin目录下
下面建立的程序是将word转化为html,当然也可以是txt,不过最好是html格式
1、建立一个java工程,名为wordtohtml,并建立一个wordtohtml.java文件,粘贴上以下代码:
import com.jacob.com.*;
import com.jacob.activeX.*;
import java.io.*;
public class wordtohtml
{
public static void change(String paths, String savepaths)
{
File d = new File(paths);
File lists[] = d.listFiles();
String pathss = new String("");
for(int i = 0; i < lists.length; i ++)
{
if(lists[i].isFile())
{
String filename = lists[i].getName();
String filetype = new String("");
//取得文件类型
filetype = filename.substring((filename.length() - 3), filename.length());
//判断是否为doc文件
if(filetype.equals("doc"))
{
System.out.println("当前正在转换......");//打印当前目录路径
System.out.println(paths);//打印doc文件名
System.out.println(filename.substring(0, (filename.length() - 4)));
ActiveXComponent app = new ActiveXComponent("Word.Application");//启动word
String docpath = paths + "//"+filename;
System.out.println(docpath);
String htmlpath = savepaths +"//"+ filename.substring(0, (filename.length() - 4))+".html";
System.out.println(htmlpath);
String inFile = docpath;//要转换的word文件
String tpFile = htmlpath;//HTML文件
boolean flag = false;
try
{
app.setProperty("Visible", new Variant(false));//设置word不可见
Object docs = app.getProperty("Documents").toDispatch();
Object doc = Dispatch.invoke((Dispatch) docs,"Open", Dispatch.Method, new Object[]{inFile,new Variant(false), new Variant(true)}, new int[1]).toDispatch();//打开word文件
Dispatch.invoke((Dispatch) doc,"SaveAs", Dispatch.Method, new Object[]
{tpFile,new Variant(8)}, new int[1]);//作为格式保存到临时文件
// Variant f = new Variant(false);
Dispatch.call((Dispatch)doc, "Close");
flag = true;
}
catch (Exception e)
{
e.printStackTrace();
}
finally {
app.invoke("Quit", new Variant[]{});
}
System.out.println("转化完毕!");
}
/* else {
pathss = paths;//进入下一级目录
pathss = pathss + lists[i].getName() + "\\";
change(pathss, savepaths);
}*/
}
}
}
public static void main(String[] args)
{
String paths = "D:\\word";
String savepaths = "D:\\word";
System.out.println("start:");
change(paths, savepaths);
System.out.println("it's over");
}
}
------------
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1682)
at java.lang.Runtime.loadLibrary0(Runtime.java:822)
at java.lang.System.loadLibrary(System.java:992)
at com.jacob.com.Dispatch.<clinit>(Dispatch.java:36)
------------
Source: Microsoft Word
Description: RCHITECTURE=x86
at com.jacob.com.Dispatch.invokev(Dispatch.java:890)
at com.jacob.com.Dispatch.callN(Dispatch.java:526)
at com.jacob.com.Dispatch.call(Dispatch.java:606)
at WordControl.openDocument(Test06_08_01.java:3027)
……
at java.awt.EventDispatchThread.pumpEvents(EventDispatchThread.java:149)
at java.awt.EventDispatchThread.run(EventDispatchThread.java:110)
------------
At Invoke of: Documents
Description: An unknown COM error has occured.
at com.jacob.com.Dispatch.invokev(Native Method)
at com.jacob.activeX.ActiveXComponent.getProperty(ActiveXComponent.java)
at com.perlong.poa.common.dao.sqlserver.WordToHtml.change(WordToHtml.java:73)
at com.perlong.poa.common.dao.sqlserver.WordToHtml.main(WordToHtml.java:121)
com.jacob.com.ComFailException: A COM exception has been encountered:
Description: An unknown COM error has occured.
at com.jacob.com.Dispatch.invokev(Native Method)
at com.jacob.activeX.ActiveXComponent.invoke(ActiveXComponent.java)
at com.perlong.poa.common.dao.sqlserver.WordToHtml.change(WordToHtml.java:92)
at com.perlong.poa.common.dao.sqlserver.WordToHtml.main(WordToHtml.java:121)
Exception in thread "main"
------------
C/C++版
1、打开已有的word文件,进行转化为txt
先建立一个控制台程序,取名为word_to_txt
在word_to_txt.cpp中代码如下:
// word_to_txt.cpp : 定义控制台应用程序的入口点。
#include "stdafx.h"
void DocToTxt(BSTR bstrOpenFile, BSTR bstrSaveFile);
void DocToTxt(BSTR bstrOpenFile, BSTR bstrSaveFile)
{
// ******************* Declare Some Variables ********************
// Variables that will be used and re-used in our calls
DISPPARAMS m_dpNoArgs = {NULL, NULL, 0, 0};
VARIANT m_vResult;
OLECHAR FAR* m_szFunction;
// IDispatch pointers for Word's objects
IDispatch* m_pDispDocs; //Documents collection
IDispatch* m_pDispActiveDoc; //ActiveDocument object
// DISPID's
DISPID m_dispid_Docs; //Documents property of Application object
DISPID m_dispid_ActiveDoc; //ActiveDocument property of Application
DISPID m_dispid_SaveAs; //SaveAs method of the Document object
DISPID m_dispid_Quit; //Quit method of the Application object
DISPID m_dispid_Open; //Open method of the Application object
BSTR m_bstrEmptyString ;
// ******************** Start Automation ***********************
//Initialize the COM libraries
::CoInitialize(NULL);
// Create an instance of the Word application and obtain the pointer
// to the application's IDispatch interface.
CLSID m_clsid;
CLSIDFromProgID(L"Word.Application", &m_clsid);
IUnknown* m_pUnk;
HRESULT m_hr = ::CoCreateInstance( m_clsid, NULL, CLSCTX_SERVER,
IID_IUnknown, (void**) &m_pUnk);
IDispatch* m_pDispApp;
m_hr = m_pUnk->QueryInterface( IID_IDispatch, (void**)&m_pDispApp);
// Get IDispatch* for the Documents collection object
m_szFunction = OLESTR("Documents");
m_hr = m_pDispApp->GetIDsOfNames (IID_NULL, &m_szFunction, 1,
LOCALE_USER_DEFAULT, &m_dispid_Docs);
m_hr = m_pDispApp->Invoke (m_dispid_Docs, IID_NULL, LOCALE_USER_DEFAULT,
DISPATCH_PROPERTYGET, &m_dpNoArgs, &m_vResult,
NULL, NULL);
m_pDispDocs = m_vResult.pdispVal;
// Query id of method open
m_szFunction = OLESTR("Open");
m_hr = m_pDispDocs->GetIDsOfNames(IID_NULL, &m_szFunction,1,
LOCALE_USER_DEFAULT, &m_dispid_Open);
// Prepare parameters for open method
VARIANT vArgsOpen[6];
DISPPARAMS dpOpen;
dpOpen.cArgs = 6;
dpOpen.cNamedArgs = 0;
dpOpen.rgvarg = vArgsOpen;
VARIANT vFalse, vTrue;
vFalse.vt = VT_BOOL;
vFalse.boolVal = FALSE;
vTrue.vt = VT_BOOL;
vTrue.boolVal = TRUE;
m_bstrEmptyString = ::SysAllocString(OLESTR("")) ;
vArgsOpen[5].vt = VT_BSTR;
vArgsOpen[5].bstrVal = bstrOpenFile;
vArgsOpen[4] = vFalse;
vArgsOpen[3] = vTrue;
vArgsOpen[2] = vFalse;
vArgsOpen[1].vt = VT_BSTR;
vArgsOpen[1].bstrVal = m_bstrEmptyString;
vArgsOpen[0].vt = VT_BSTR;
vArgsOpen[0].bstrVal = m_bstrEmptyString;
//Execute open method
m_hr=m_pDispDocs->Invoke(m_dispid_Open,IID_NULL,LOCALE_USER_DEFAULT,
DISPATCH_METHOD,&dpOpen,NULL,NULL,NULL);
//Query activedocument interface
m_szFunction = OLESTR("ActiveDocument");
m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1,
LOCALE_USER_DEFAULT,&m_dispid_ActiveDoc);
m_hr = m_pDispApp->Invoke(m_dispid_ActiveDoc,IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET,
&m_dpNoArgs,&m_vResult,NULL,NULL);
m_pDispActiveDoc = m_vResult.pdispVal;
//Prepare arguments for save as .txt
VARIANT vArgsSaveAs[11];
DISPPARAMS dpSaveAs;
dpSaveAs.cArgs = 11;
dpSaveAs.cNamedArgs = 0;
dpSaveAs.rgvarg = vArgsSaveAs;
//wdFormatText 2,
//wdFormatDocument 0 Microsoft Office Word format
vArgsSaveAs[10].vt = VT_BSTR;
vArgsSaveAs[10].bstrVal = bstrSaveFile; //Filename
vArgsSaveAs[9].vt = VT_I4;
vArgsSaveAs[9].lVal = 2; //FileFormat(wdFormatText = 2) 这是要转化文件格式的参数,后面有参数说明
vArgsSaveAs[8] = vFalse; //LockComments
vArgsSaveAs[7].vt = VT_BSTR;
vArgsSaveAs[7].bstrVal = m_bstrEmptyString; //Password
vArgsSaveAs[6].vt = VT_BOOL;
vArgsSaveAs[6].boolVal = TRUE; //AddToRecentFiles
vArgsSaveAs[5].vt = VT_BSTR;
vArgsSaveAs[5].bstrVal = m_bstrEmptyString; //WritePassword
vArgsSaveAs[4] = vFalse; //ReadOnlyRecommended
vArgsSaveAs[3] = vFalse; //EmbedTrueTypeFonts
vArgsSaveAs[2] = vFalse; //SaveNativePictureFormat
vArgsSaveAs[1] = vFalse; //SaveFormsData
vArgsSaveAs[0] = vFalse; //SaveAsOCELetter
// Query and execute SaveAs method
m_szFunction = OLESTR("SaveAs");
m_hr = m_pDispActiveDoc->GetIDsOfNames(IID_NULL,&m_szFunction,1,
LOCALE_USER_DEFAULT,&m_dispid_SaveAs);
m_hr = m_pDispActiveDoc->Invoke(m_dispid_SaveAs, IID_NULL,
LOCALE_USER_DEFAULT,DISPATCH_METHOD,
&dpSaveAs,NULL,NULL,NULL);
// Invoke the Quit method
m_szFunction = OLESTR("Quit");
m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1,
LOCALE_USER_DEFAULT, &m_dispid_Quit);
m_hr = m_pDispApp->Invoke(m_dispid_Quit, IID_NULL, LOCALE_USER_DEFAULT,
DISPATCH_METHOD, &m_dpNoArgs, NULL, NULL, NULL);
//Clean-up
::SysFreeString(bstrOpenFile) ;
::SysFreeString(bstrSaveFile) ;
::SysFreeString(m_bstrEmptyString) ;
m_pDispActiveDoc->Release();
m_pDispDocs->Release();
m_pDispApp->Release();
m_pUnk->Release();
::CoUninitialize();
}
int main(int argc, char* argv[])
{
BSTR str1,str2 ;
str1 = ::SysAllocString(OLESTR("D:\\word\\例3.doc"));
str2 = ::SysAllocString(OLESTR("D:\\word\\例3.txt"));
DocToTxt(str1,str2);
//DocToTxt(::SysAllocString(OLESTR("D:\\code\\data\\cvtxt.doc")), ::SysAllocString(OLESTR("D:\\code\\data\\cvtxt22.docx")));
return 0;
}
还需要增加stdafx.h头文件,内容如下:
// stdafx.h : 标准系统包含文件的包含文件,
// 或是经常使用但不常更改的
// 特定于项目的包含文件
//
#pragma once
#include "targetver.h"
#include <stdio.h>
#include <tchar.h>
#include <ole2.h>
// TODO: 在此处引用程序需要的其他头文件
以下为文件格式的定义:
wdFormatDocument 0 Microsoft Office Word format.
wdFormatDOSText 4 Microsoft DOS text format.
wdFormatDOSTextLineBreaks 5 Microsoft DOS text with line breaks preserved.
wdFormatEncodedText 7 Encoded text format.
wdFormatFilteredHTML 10 Filtered HTML format.
wdFormatHTML 8 Standard HTML format.
wdFormatRTF 6 Rich text format (RTF).
wdFormatTemplate 1 Word template format.
wdFormatText 2 Microsoft Windows text format.
wdFormatTextLineBreaks 3 Windows text format with line breaks preserved.
wdFormatUnicodeText 7 Unicode text format.
wdFormatWebArchive 9 Web archive format.
wdFormatXML 11 Extensible Markup Language (XML) format.
wdFormatDocument97 0 Microsoft Word 97 document format.
wdFormatDocumentDefault 16 Word default document file format. For Microsoft Office Word 2007, this is the DOCX format.
wdFormatPDF 17 PDF format.
wdFormatTemplate97 1 Word 97 template format.
wdFormatXMLDocument 12 XML document format.
wdFormatXMLDocumentMacroEnabled 13 XML document format with macros enabled.
wdFormatXMLTemplate 14 XML template format.
wdFormatXMLTemplateMacroEnabled 15 XML template format with macros enabled.
wdFormatXPS 18 XPS format.
2、自动生成一个word,并且在编辑其中的内容
#include <ole2.h>
#include <stdio.h>
int main(int argc, char* argv[])
{
// ******************* Declare Some Variables ********************
// Variables that will be used and re-used in our calls
DISPPARAMS dpNoArgs = {NULL, NULL, 0, 0};
VARIANT vResult;
OLECHAR FAR* szFunction;
BSTR bstrTemp;
// IDispatch pointers for Word's objects
IDispatch* pDispDocs; //Documents collection
IDispatch* pDispSel; //Selection object
IDispatch* pDispActiveDoc; //ActiveDocument object
// DISPID's
DISPID dispid_Docs; //Documents property of Application object
DISPID dispid_DocsAdd; //Add method of Documents collection
//object
DISPID dispid_Sel; //Selection property of Applicaiton object
DISPID dispid_TypeText; //TypeText method of Selection object
DISPID dispid_TypePara; //TypeParagraph method of Selection object
DISPID dispid_ActiveDoc; //ActiveDocument property of Application
//obj
DISPID dispid_SaveAs; //SaveAs method of the Document object
DISPID dispid_Quit; //Quit method of the Application object
// ******************** Start Automation ***********************
//Initialize the COM libraries
::CoInitialize(NULL);
// Create an instance of the Word application and obtain the pointer
// to the application's IDispatch interface.
CLSID clsid;
CLSIDFromProgID(L"Word.Application", &clsid);
IUnknown* pUnk;
HRESULT hr = ::CoCreateInstance( clsid, NULL, CLSCTX_SERVER,
IID_IUnknown, (void**) &pUnk);
IDispatch* pDispApp;
hr = pUnk->QueryInterface(IID_IDispatch, (void**)&pDispApp);
// Get IDispatch* for the Documents collection object
szFunction = OLESTR("Documents");
hr = pDispApp->GetIDsOfNames (IID_NULL, &szFunction, 1,
LOCALE_USER_DEFAULT, &dispid_Docs);
hr = pDispApp->Invoke (dispid_Docs, IID_NULL, LOCALE_USER_DEFAULT,
DISPATCH_PROPERTYGET, &dpNoArgs, &vResult,
NULL, NULL);
pDispDocs = vResult.pdispVal;
// Invoke the Add method on the Documents collection object
// to create a new document in Word
// Note that the Add method can take up to 3 arguments, all of which
// are optional. We are not passing it any so we are using an empty
// DISPPARAMS structure
szFunction = OLESTR("Add");
hr = pDispDocs->GetIDsOfNames(IID_NULL, &szFunction, 1,
LOCALE_USER_DEFAULT, &dispid_DocsAdd);
hr = pDispDocs->Invoke(dispid_DocsAdd, IID_NULL, LOCALE_USER_DEFAULT,
DISPATCH_METHOD, &dpNoArgs, &vResult, NULL,
NULL);
// Get IDispatch* for the Selection object
szFunction = OLESTR("Selection");
hr = pDispApp->GetIDsOfNames (IID_NULL, &szFunction, 1,
LOCALE_USER_DEFAULT, &dispid_Sel);
hr = pDispApp->Invoke (dispid_Sel, IID_NULL, LOCALE_USER_DEFAULT,
DISPATCH_PROPERTYGET, &dpNoArgs, &vResult,
NULL, NULL);
pDispSel = vResult.pdispVal;
// Get the DISPIDs of the TypeText and TypeParagraph methods of the
// Selection object. We'll use these DISPIDs multiple times.
szFunction = OLESTR("TypeText");
hr = pDispSel->GetIDsOfNames(IID_NULL, &szFunction, 1,
LOCALE_USER_DEFAULT, &dispid_TypeText);
szFunction = OLESTR("TypeParagraph");
hr = pDispSel->GetIDsOfNames(IID_NULL, &szFunction, 1,
LOCALE_USER_DEFAULT, &dispid_TypePara);
// The TypeText method has and requires only one argument, a string,
// so set up the DISPPARAMS accordingly
VARIANT vArgsTypeText[1];
DISPPARAMS dpTypeText;
bstrTemp = ::SysAllocString(OLESTR("One"));
vArgsTypeText [0].vt = VT_BSTR;
vArgsTypeText [0].bstrVal = bstrTemp;
dpTypeText.cArgs = 1;
dpTypeText.cNamedArgs = 0;
dpTypeText.rgvarg = vArgsTypeText;
//Invoke the first TypeText and TypeParagraph pair
hr = pDispSel->Invoke (dispid_TypeText, IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_METHOD,
&dpTypeText, NULL, NULL, NULL);
hr = pDispSel->Invoke (dispid_TypePara, IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_METHOD,
&dpNoArgs, NULL, NULL, NULL);
::SysFreeString(bstrTemp);
//Invoke the second TypeText and TypeParagraph pair
bstrTemp = ::SysAllocString(OLESTR("Two"));
hr = pDispSel->Invoke (dispid_TypeText, IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_METHOD,
&dpTypeText, NULL, NULL, NULL);
hr = pDispSel->Invoke (dispid_TypePara, IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_METHOD,
&dpNoArgs, NULL, NULL, NULL);
::SysFreeString(bstrTemp);
//Invoke the third TypeText and TypeParagraph pair
bstrTemp = ::SysAllocString(OLESTR("Three"));
hr = pDispSel->Invoke (dispid_TypeText, IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_METHOD,
&dpTypeText, NULL, NULL, NULL);
hr = pDispSel->Invoke (dispid_TypePara, IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_METHOD,
&dpNoArgs, NULL, NULL, NULL);
::SysFreeString(bstrTemp);
// Get IDispatch* for the ActiveDocument object
szFunction = OLESTR("ActiveDocument");
hr = pDispApp->GetIDsOfNames (IID_NULL, &szFunction, 1,
LOCALE_USER_DEFAULT,
&dispid_ActiveDoc);
hr = pDispApp->Invoke (dispid_ActiveDoc, IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET,
&dpNoArgs, &vResult, NULL, NULL);
pDispActiveDoc = vResult.pdispVal;
//Set up the DISPPARAMS for the SaveAs method (11 arguments)
VARIANT vArgsSaveAs[11];
DISPPARAMS dpSaveAs;
dpSaveAs.cArgs = 11;
dpSaveAs.cNamedArgs = 0;
dpSaveAs.rgvarg = vArgsSaveAs;
BSTR bstrEmptyString;
bstrEmptyString = ::SysAllocString(OLESTR(""));
VARIANT vFalse;
vFalse.vt = VT_BOOL;
vFalse.boolVal = FALSE;
bstrTemp = ::SysAllocString(OLESTR("c:\\doc1.doc"));
vArgsSaveAs[10].vt = VT_BSTR;
vArgsSaveAs[10].bstrVal = bstrTemp; //Filename
vArgsSaveAs[9].vt = VT_I4;
vArgsSaveAs[9].lVal = 0; //FileFormat
vArgsSaveAs[8] = vFalse; //LockComments
vArgsSaveAs[7].vt = VT_BSTR;
vArgsSaveAs[7].bstrVal = bstrEmptyString; //Password
vArgsSaveAs[6].vt = VT_BOOL;
vArgsSaveAs[6].boolVal = TRUE; //AddToRecentFiles
vArgsSaveAs[5].vt = VT_BSTR;
vArgsSaveAs[5].bstrVal = bstrEmptyString; //WritePassword
vArgsSaveAs[4] = vFalse; //ReadOnlyRecommended
vArgsSaveAs[3] = vFalse; //EmbedTrueTypeFonts
vArgsSaveAs[2] = vFalse; //SaveNativePictureFormat
vArgsSaveAs[1] = vFalse; //SaveFormsData
vArgsSaveAs[0] = vFalse; //SaveAsOCELetter
//Invoke the SaveAs method
szFunction = OLESTR("SaveAs");
hr = pDispActiveDoc->GetIDsOfNames(IID_NULL, &szFunction, 1,
LOCALE_USER_DEFAULT, &dispid_SaveAs);
hr = pDispActiveDoc->Invoke(dispid_SaveAs, IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_METHOD,
&dpSaveAs, NULL, NULL, NULL);
::SysFreeString(bstrEmptyString);
//Invoke the Quit method
szFunction = OLESTR("Quit");
hr = pDispApp->GetIDsOfNames(IID_NULL, &szFunction, 1,
LOCALE_USER_DEFAULT, &dispid_Quit);
hr = pDispApp->Invoke (dispid_Quit, IID_NULL, LOCALE_USER_DEFAULT,
DISPATCH_METHOD, &dpNoArgs, NULL, NULL, NULL);
//Clean-up
::SysFreeString(bstrTemp);
pDispActiveDoc->Release();
pDispSel->Release();
pDispDocs->Release();
pDispApp->Release();
pUnk->Release();
::CoUninitialize();
return 0;
}
从上面的程序可以看到自动生成word还是打开已有的word,只是方法不同而已,一个是Add方法,一个是Open方法。