通过COM组件查找office和pdf文档的关键字

这个代码实现了一个工具,通过COM组件在Word、Excel、PowerPoint和PDF文档中搜索关键字。它利用Microsoft Office的COM接口进行操作,并且支持区分大小写的搜索。对于PDF文件,它依赖Adobe Acrobat SDK来查找关键字。
摘要由CSDN通过智能技术生成
#ifndef __KEYSEARCHER_UTIL_HPP__
#define __KEYSEARCHER_UTIL_HPP__

#pragma  warning(disable:4259)

#import   "C:\Program Files\Common Files\Microsoft Shared\OFFICE12\MSO.DLL"   rename("RGB", "RGBMSO")
#import   "C:\Program Files\Common Files\Microsoft Shared\VBA\VBA6\VBE6EXT.OLB"   
#import   "D:\Program Files\Microsoft Office\Office12\msword.olb"  rename("FindText","_FindText"), rename("ExitWindows","_ExitWindows")  
#import	  "D:\Program Files\Microsoft Office\Office12\excel.exe"  rename("ReplaceText", "ReplaceTextXL") rename("DialogBox", "DialogBoxXL") rename("RGB", "RBGXL") rename("DocumentProperties", "DocumentPropertiesXL") rename("CopyFile", "CopyFileXL") exclude("IFont","IPicture") 
#import	  "D:\Program Files\Microsoft Office\Office12\msppt.olb" rename("RGB", "RBGPPT")

#include <string>
#include <iostream>
#include <atlbase.h>
#include <atlconv.h>
#include <boost/algorithm/string.hpp>
#include "acrobat.h"

using namespace Office;
using namespace Word;
using namespace Excel;
using namespace PowerPoint;


class key_searcher_util
{
public:

	static bool is_contains(const std::string& content, const std::string& key, bool is_case)
	{
		bool ret = false;
		std::string key_tofind(key);
		std::string content_tosearch(content);
		if (!is_case)
		{
			boost::to_lower(key_tofind);
			boost::to_lower(content_tosearch);
		}

		if (!content_tosearch.empty()&& std::string::npos != content_tosearch.find(key_tofind))
		{
			ret = TRUE;
		}
		return ret;
	}

	static bool search_in_word(const std::string& file_path, const std::string& key, bool is_case = false)
	{
		USES_CONVERSION;
		bool key_found = false;
		HRESULT   hr = S_OK; 

		Word::_ApplicationPtr pWordApp;
		Word::_ApplicationPtr   pWordApplication = NULL; 
		Word::DocumentsPtr   pDocsPtr = NULL; 
		Word::_DocumentPtr   pDocPtr = NULL; 
		Word::_DocumentPtr pDoc =NULL;

		CoInitialize(NULL);
		try
		{ 
			CLSID clsid;
			CLSIDFromProgID(TEXT("Word.Application"), &clsid); 
			pWordApp.CreateInstance (clsid,NULL);
			pWordApp ->PutVisible (VARIANT_FALSE);
			//open file
			_variant_t bstrFileName = file_path.c_str();
			pDoc = pWordApp ->GetDocuments() ->Open (&bstrFileName);
			_bstr_t doc_content = pDoc->Content->Text;
			const char*  szData= (const char*)doc_content;
			if( szData && strlen(szData) >0 ) 
			{
				key_found = is_contains(szData, key, is_case);
			}
		} 
		catch(_com_error & error) 
		{ 
			wprintf(L"PowerPoint throws the error: %s\n", error.ErrorMessage());
			wprintf(L"Description: %s\n", (LPCWSTR) error.Description());
		} 

		if (pDoc)
		{
			pDoc->Close();
		}
		
		if (pWordApp)
		{
			pWordApp->Quit();
		}
		
		CoUninitialize();

		return key_found; 
	}

	static bool  search_in_excel(const std::string& file_path, const std::string& key,  bool is_case = false)
	{
		USES_CONVERSION;
		bool key_found = false;
		HRESULT hr  = E_FAIL;
		Excel::_ApplicationPtr spExcelApp; 
		Excel::WorkbooksPtr spWorkBooks;
		Excel::_WorkbookPtr spWorkbook;
		Excel::WorksheetsPtr spWorkSheets;

		CoInitialize(NULL);
		try
		{ 
			hr = spExcelApp.CreateInstance(TEXT("Excel.Application")); 
			if(SUCCEEDED(hr))
			{
				spExcelApp ->PutVisible (0, VARIANT_FALSE);

				spWorkBooks = spExcelApp->GetWorkbooks();
				spWorkbook = spWorkBooks->Open(file_path.c_str());  // open excel file 
				int nWorkSheetsCount = 0;
				nWorkSheetsCount = spWorkbook->Worksheets->GetCount();
				//printf("work sheets:%d\n", nWorkSheetsCount);

				for (int i = 1; i <= nWorkSheetsCount && !key_found; ++i)
				{
					//Excel::_WorksheetPtr spWorksheet = spWorkbook->ActiveSheet; 
					//pWorksheet->Name = L"Chart Data"; 
					//Excel::RangePtr pRange = spWorksheet->Cells; 

					Excel::_WorksheetPtr spWorksheet = spWorkbook->Worksheets->Item[i];
					Excel::RangePtr pUsedRange = spWorksheet->GetUsedRange();
					//rows
					Excel::RangePtr pRows = pUsedRange->GetRows();
					long lgUsedRowNum = 0;
					lgUsedRowNum= pRows->GetCount();
					//column
					Excel::RangePtr pColumns = pUsedRange->GetColumns();
					long lgUsedColumnNum = 0;
					lgUsedColumnNum= pColumns->GetCount();

					std::string strItemText;
					const char* pszItemText = NULL;

					for ( int i = 1; i <= lgUsedRowNum && !key_found; ++i)
					{
						 for ( int j = 1; j <= lgUsedColumnNum && !key_found; ++j )
						 {
							 _variant_t  vItem = pUsedRange->Item[i][j];
							 _bstr_t     bstrText( vItem );
							 pszItemText = (const char*)bstrText;

							 if (pszItemText&& strlen(pszItemText) >0)
							 {
								key_found = is_contains(pszItemText, key, is_case); 
							 }
						 }
					}
				}
			}
		
		} 
		catch (_com_error& error) 
		{ 
			wprintf(L"PowerPoint throws the error: %s\n", error.ErrorMessage());
			wprintf(L"Description: %s\n", (LPCWSTR) error.Description());
		}


		if (spWorkbook)
		{
			spWorkbook->PutSaved(0, VARIANT_TRUE);
			spWorkbook->Close(VARIANT_FALSE);  // save changes 
		}

		if (spWorkBooks)
		{
			spWorkBooks->Close();
		}
		if (spExcelApp)
		{
			spExcelApp->Quit(); 
		}

		CoUninitialize();

		return key_found;
	}


	static bool search_in_ppt(const std::string& file_path, const std::string& key, bool is_case  = false )
	{
		USES_CONVERSION;
		bool key_found = false;
		HRESULT hr  = E_FAIL;
		PowerPoint::_ApplicationPtr spPowerPointApp; 
		PowerPoint::PresentationsPtr spPres;
		PowerPoint::_PresentationPtr spPre ;

		CoInitialize(NULL);
		try
		{ 
			hr = spPowerPointApp.CreateInstance(L"PowerPoint.Application"); 
			if (SUCCEEDED(hr))
			{
				//must set to true
				spPowerPointApp->PutVisible(Office::msoTrue);
				spPres = spPowerPointApp->Presentations;
				spPre = spPres->Open(file_path.c_str(), Office::msoFalse, Office::msoFalse, Office::msoTrue);

				PowerPoint::SlidesPtr spSlides = spPre->Slides;
				int nCount = spSlides->GetCount();
				wprintf(L"The presentation currently has %ld slides\n", nCount);

				for (int i = 1; i <= nCount && !key_found; ++i)
				{
					PowerPoint::_SlidePtr spSlide = spSlides->Item(COleVariant((long)i));
					// Add some texts to the slide
					PowerPoint::ShapesPtr spShapes = spSlide->Shapes;

					int nShapeCount = spShapes->Count;

					for (int j = 1; j <= nShapeCount && !key_found; ++j)
					{
						PowerPoint::ShapePtr spShape = spShapes->Item((long)j);
						PowerPoint::TextFramePtr spTxtFrame = spShape->TextFrame;
						PowerPoint::TextRangePtr spTxtRange = spTxtFrame->TextRange;

						_bstr_t text = spTxtRange->GetText();
						const char* pszText = (const char*)text;
						if (pszText && strlen(pszText))
						{
							key_found = is_contains(pszText, key, is_case);
						}
					}
				}
			}
		} 
		catch (_com_error& error) 
		{ 
			wprintf(L"PowerPoint throws the error: %s\n", error.ErrorMessage());
			wprintf(L"Description: %s\n", (LPCWSTR) error.Description());
		}

		if (spPre)
		{
			spPre->Close();
			spPre = NULL;
		}

		if (spPowerPointApp)
		{
			spPowerPointApp->Quit(); 
			spPowerPointApp = NULL;
		}

		CoUninitialize();

		return key_found;
	}

	static bool search_in_pdf(const std::string& file_path, const std::string& key, bool is_case  = false )
	{
		USES_CONVERSION;
		bool key_found = false;
		BOOL bRet = FALSE;

		bRet =  AfxOleInit();
		try
		{
			COleException e;

			CAcroApp * pAcroApp = new CAcroApp();
			if (pAcroApp)
			{
				bRet = pAcroApp->CreateDispatch(TEXT("AcroExch.App"), &e);

				CAcroAVDoc * pAcroAvDoc = new CAcroAVDoc();
				if (pAcroAvDoc)
				{
					bRet = pAcroAvDoc->CreateDispatch(TEXT("AcroExch.AVDoc"), &e);

					if (bRet)
					{

						long lRet = pAcroAvDoc->Open(A2T(file_path.c_str()), NULL);

						if(lRet != 0) 
						{
							BOOL bCaseSensitive = false, bWholeWordsOnly=true, bReset= true;
							lRet = pAcroAvDoc->FindText(A2T(key.c_str()), bCaseSensitive, bWholeWordsOnly, bReset);
							if(lRet == 0)
							{
								AfxMessageBox(TEXT("Cannot find the text in the document."));
							}
							else
							{
								key_found = true;
								AfxMessageBox(TEXT("Succeed to find the text in the document."));
							}
						}
						else
						{
							AfxMessageBox(TEXT("Error in opening a PDF file."));
						}

						pAcroAvDoc->Close(1);
					}


					delete pAcroAvDoc;
					pAcroAvDoc = NULL; 
				}

				pAcroApp->CloseAllDocs();
				pAcroApp->Hide();
				pAcroApp->Exit();

				// clean memory
				delete pAcroApp;
				pAcroApp = NULL;  
			}

		}catch(...)
		{

		}
		return  key_found;

	}

};//class key_searcher_util


#endif
注意:以上com组件的api使用前提条件是需要安装office和pdf的SDK
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值