之前的程序可以把word内容通过另存为方式导入到txt中
直接使用这个程序,txt -> doc ,也可以导入到doc文件中,但是txt->docx , 导入到word2007就有问题
这时,改变保存参数设置就可以
vArgsSaveAs[10].vt = VT_BSTR;
vArgsSaveAs[10].bstrVal = bstrSaveFile; //Filename
vArgsSaveAs[9].vt = VT_I4;
vArgsSaveAs[9].lVal = 16; //FileFormat(wdFormatText = 2)
要设置好这个vArgsSaveAs[9].lVal = 16;
性能:初步测试,转换处理10万行,大小15MB的txt文件约用时20秒,和机器配置有关
需要注意的是字符编码,采用这个程序来处理ANSI编码的txt,中文会出现乱码,使用UTF-8编码的没有问题
另外,新生成的word使用的是默认模板,如果需要使用特定的模板可以到相应的路径下修改默认模板
附:完整程序
#include <ole2.h>
#include <stdio.h>
void DocToTxt(BSTR bstrOpenFile, BSTR bstrSaveFile);
void DocToTxt(BSTR bstrOpenFile, BSTR bstrSaveFile)
{
// ******************* Declare Some Variables ********************
// Variables that will be used and re-used in our calls
DISPPARAMS m_dpNoArgs = {NULL, NULL, 0, 0};
VARIANT m_vResult;
OLECHAR FAR* m_szFunction;
// IDispatch pointers for Word's objects
IDispatch* m_pDispDocs; //Documents collection
IDispatch* m_pDispActiveDoc; //ActiveDocument object
// DISPID's
DISPID m_dispid_Docs; //Documents property of Application object
DISPID m_dispid_ActiveDoc; //ActiveDocument property of Application
DISPID m_dispid_SaveAs; //SaveAs method of the Document object
DISPID m_dispid_Quit; //Quit method of the Application object
DISPID m_dispid_Open; //Open method of the Application object
BSTR m_bstrEmptyString ;
// ******************** Start Automation ***********************
//Initialize the COM libraries
::CoInitialize(NULL);
// Create an instance of the Word application and obtain the pointer
// to the application's IDispatch interface.
CLSID m_clsid;
CLSIDFromProgID(L"Word.Application", &m_clsid);
IUnknown* m_pUnk;
HRESULT m_hr = ::CoCreateInstance( m_clsid, NULL, CLSCTX_SERVER,
IID_IUnknown, (void**) &m_pUnk);
IDispatch* m_pDispApp;
m_hr = m_pUnk->QueryInterface( IID_IDispatch, (void**)&m_pDispApp);
// Get IDispatch* for the Documents collection object
m_szFunction = OLESTR("Documents");
m_hr = m_pDispApp->GetIDsOfNames (IID_NULL, &m_szFunction, 1,
LOCALE_USER_DEFAULT, &m_dispid_Docs);
m_hr = m_pDispApp->Invoke (m_dispid_Docs, IID_NULL, LOCALE_USER_DEFAULT,
DISPATCH_PROPERTYGET, &m_dpNoArgs, &m_vResult,
NULL, NULL);
m_pDispDocs = m_vResult.pdispVal;
// Query id of method open
m_szFunction = OLESTR("Open");
m_hr = m_pDispDocs->GetIDsOfNames(IID_NULL, &m_szFunction,1,
LOCALE_USER_DEFAULT, &m_dispid_Open);
// Prepare parameters for open method
VARIANT vArgsOpen[6];
DISPPARAMS dpOpen;
dpOpen.cArgs = 6;
dpOpen.cNamedArgs = 0;
dpOpen.rgvarg = vArgsOpen;
VARIANT vFalse, vTrue;
vFalse.vt = VT_BOOL;
vFalse.boolVal = FALSE;
vTrue.vt = VT_BOOL;
vTrue.boolVal = TRUE;
m_bstrEmptyString = ::SysAllocString(OLESTR("")) ;
vArgsOpen[5].vt = VT_BSTR;
vArgsOpen[5].bstrVal = bstrOpenFile;
vArgsOpen[4] = vFalse;
vArgsOpen[3] = vTrue;
vArgsOpen[2] = vFalse;
vArgsOpen[1].vt = VT_BSTR;
vArgsOpen[1].bstrVal = m_bstrEmptyString;
vArgsOpen[0].vt = VT_BSTR;
vArgsOpen[0].bstrVal = m_bstrEmptyString;
//Execute open method
m_hr=m_pDispDocs->Invoke(m_dispid_Open,IID_NULL,LOCALE_USER_DEFAULT,
DISPATCH_METHOD,&dpOpen,NULL,NULL,NULL);
//Query activedocument interface
m_szFunction = OLESTR("ActiveDocument");
m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1,
LOCALE_USER_DEFAULT,&m_dispid_ActiveDoc);
m_hr = m_pDispApp->Invoke(m_dispid_ActiveDoc,IID_NULL,
LOCALE_USER_DEFAULT, DISPATCH_PROPERTYGET,
&m_dpNoArgs,&m_vResult,NULL,NULL);
m_pDispActiveDoc = m_vResult.pdispVal;
//Prepare arguments for save as .txt
VARIANT vArgsSaveAs[11];
DISPPARAMS dpSaveAs;
dpSaveAs.cArgs = 11;
dpSaveAs.cNamedArgs = 0;
dpSaveAs.rgvarg = vArgsSaveAs;
//wdFormatText 2,
//wdFormatDocument 0 Microsoft Office Word format
vArgsSaveAs[10].vt = VT_BSTR;
vArgsSaveAs[10].bstrVal = bstrSaveFile; //Filename
vArgsSaveAs[9].vt = VT_I4;
vArgsSaveAs[9].lVal = 16; //FileFormat(wdFormatText = 2)
vArgsSaveAs[8] = vFalse; //LockComments
vArgsSaveAs[7].vt = VT_BSTR;
vArgsSaveAs[7].bstrVal = m_bstrEmptyString; //Password
vArgsSaveAs[6].vt = VT_BOOL;
vArgsSaveAs[6].boolVal = TRUE; //AddToRecentFiles
vArgsSaveAs[5].vt = VT_BSTR;
vArgsSaveAs[5].bstrVal = m_bstrEmptyString; //WritePassword
vArgsSaveAs[4] = vFalse; //ReadOnlyRecommended
vArgsSaveAs[3] = vFalse; //EmbedTrueTypeFonts
vArgsSaveAs[2] = vFalse; //SaveNativePictureFormat
vArgsSaveAs[1] = vFalse; //SaveFormsData
vArgsSaveAs[0] = vFalse; //SaveAsOCELetter
// Query and execute SaveAs method
m_szFunction = OLESTR("SaveAs");
m_hr = m_pDispActiveDoc->GetIDsOfNames(IID_NULL,&m_szFunction,1,
LOCALE_USER_DEFAULT,&m_dispid_SaveAs);
m_hr = m_pDispActiveDoc->Invoke(m_dispid_SaveAs, IID_NULL,
LOCALE_USER_DEFAULT,DISPATCH_METHOD,
&dpSaveAs,NULL,NULL,NULL);
// Invoke the Quit method
m_szFunction = OLESTR("Quit");
m_hr = m_pDispApp->GetIDsOfNames(IID_NULL, &m_szFunction, 1,
LOCALE_USER_DEFAULT, &m_dispid_Quit);
m_hr = m_pDispApp->Invoke(m_dispid_Quit, IID_NULL, LOCALE_USER_DEFAULT,
DISPATCH_METHOD, &m_dpNoArgs, NULL, NULL, NULL);
//Clean-up
::SysFreeString(bstrOpenFile) ;
::SysFreeString(bstrSaveFile) ;
::SysFreeString(m_bstrEmptyString) ;
m_pDispActiveDoc->Release();
m_pDispDocs->Release();
m_pDispApp->Release();
m_pUnk->Release();
::CoUninitialize();
}
int main(int argc, char* argv[])
{
BSTR str1,str2 ;
str1 = ::SysAllocString(OLESTR("D:\\code\\data\\cvUTF.txt"));
str2 = ::SysAllocString(OLESTR("D:\\code\\data\\cvtxt1113.docx"));
DocToTxt(str1,str2);
//DocToTxt(::SysAllocString(OLESTR("D:\\code\\data\\cvtxt.doc")), ::SysAllocString(OLESTR("D:\\code\\data\\cvtxt22.docx")));
return 0;
}
参数意义见下面定义
http://blog.csdn.net/youthon/article/details/7019388
以下为文件格式的定义
wdFormatDocument | 0 | Microsoft Office Word format. |
wdFormatDOSText | 4 | Microsoft DOS text format. |
wdFormatDOSTextLineBreaks | 5 | Microsoft DOS text with line breaks preserved. |
wdFormatEncodedText | 7 | Encoded text format. |
wdFormatFilteredHTML | 10 | Filtered HTML format. |
wdFormatHTML | 8 | Standard HTML format. |
wdFormatRTF | 6 | Rich text format (RTF). |
wdFormatTemplate | 1 | Word template format. |
wdFormatText | 2 | Microsoft Windows text format. |
wdFormatTextLineBreaks | 3 | Windows text format with line breaks preserved. |
wdFormatUnicodeText | 7 | Unicode text format. |
wdFormatWebArchive | 9 | Web archive format. |
wdFormatXML | 11 | Extensible Markup Language (XML) format. |
wdFormatDocument97 | 0 | Microsoft Word 97 document format. |
wdFormatDocumentDefault | 16 | Word default document file format. For Microsoft Office Word 2007, this is the DOCX format. |
wdFormatPDF | 17 | PDF format. |
wdFormatTemplate97 | 1 | Word 97 template format. |
wdFormatXMLDocument | 12 | XML document format. |
wdFormatXMLDocumentMacroEnabled | 13 | XML document format with macros enabled. |
wdFormatXMLTemplate | 14 | XML template format. |
wdFormatXMLTemplateMacroEnabled | 15 | XML template format with macros enabled. |
wdFormatXPS | 18 | XPS format. |