采用OLE的方式实现word转换为文本等格式
function WordToTxt( sDocFile: string ): string;
var
WordApp: Variant;
begin
WordApp := CreateOleObject( 'Word.Basic' );
WordApp.visible := False;
if not VarIsEmpty(WordApp) then
begin
try
WordApp.FileOpen(sDocFile);
Result := ChangeFileExt( sDocFile, '.txt' );
WordApp.FileSaveAs( Result, 2 ); //2代表格式文本,见后面表格
WordApp.Quit;
finally
WordApp:=Unassigned;
end;
end;
end;
以下为文件格式的定义
wdFormatDocument | 0 | Microsoft Office Word format. |
wdFormatDOSText | 4 | Microsoft DOS text format. |
wdFormatDOSTextLineBreaks | 5 | Microsoft DOS text with line breaks preserved. |
wdFormatEncodedText | 7 | Encoded text format. |
wdFormatFilteredHTML | 10 | Filtered HTML format. |
wdFormatHTML | 8 | Standard HTML format. |
wdFormatRTF | 6 | Rich text format (RTF). |
wdFormatTemplate | 1 | Word template format. |
wdFormatText | 2 | Microsoft Windows text format. |
wdFormatTextLineBreaks | 3 | Windows text format with line breaks preserved. |
wdFormatUnicodeText | 7 | Unicode text format. |
wdFormatWebArchive | 9 | Web archive format. |
wdFormatXML | 11 | Extensible Markup Language (XML) format. |
wdFormatDocument97 | 0 | Microsoft Word 97 document format. |
wdFormatDocumentDefault | 16 | Word default document file format. For Microsoft Office Word 2007, this is the DOCX format. |
wdFormatPDF | 17 | PDF format. |
wdFormatTemplate97 | 1 | Word 97 template format. |
wdFormatXMLDocument | 12 | XML document format. |
wdFormatXMLDocumentMacroEnabled | 13 | XML document format with macros enabled. |
wdFormatXMLTemplate | 14 | XML template format. |
wdFormatXMLTemplateMacroEnabled | 15 | XML template format with macros enabled. |
wdFormatXPS | 18 | XPS format. |