delphi做一个本地网页的搜索引擎。
思路:1、建立本地网页的索引;2、在索引里面搜索关键字;3输出查找的结果。
第一步、检索因。
procedure TWebthread.GetFileList(AStrings: TStrings; ASourFile,//查找本地网页
FileName: string);
var sour_path,sour_file: string;
FileRec, subFileRec:TSearchrec;
i,n: Integer;
web:string;
begin
try
n := 0;
if rightStr(trim(ASourFile), 1) <> '/' then
sour_path :=trim(ASourFile) + '/'
else
sour_path :=trim(ASourFile);
sour_file:= FileName;
if not DirectoryExists(sour_path) then
begin
AStrings.Clear;
exit;
end;
if FindFirst(sour_path+'*.*',faAnyfile,FileRec) = 0 then
repeat
if ((FileRec.Attr and faDirectory) <> 0) then
begin
if ((FileRec.Name<> '.') and (FileRec.Name <> '..')) then
GetFileList(AStrings, sour_path+ FileRec.Name + '/', sour_file);
end
else
if FindFirst(sour_path + FileName,faAnyfile,subFileRec) = 0 then
repeat
if ((subFileRec.Attr and faDirectory) = 0)and(pos(sour_path+subFileRec.Name,TmpList.Text)=0) then
begin
path:=sour_path+subFileRec.Name;
Gettitle(path);
Postdata(path, Gettitle(path),' *');
end;
until FindNext(subFileRec)<>0;
until FindNext(FileRec)<>0;
SysUtils.FindClose(FileRec);
except
end;
end;
function TWebthread.Gettitle(ml: string): string;
var
i,j ,n,p:Integer;
s,m,s1 :WideString;
begin
Fupdate.memo2.Lines.Clear;
Fupdate.memo2.Lines.LoadFromFile(ml);
s :=Memo2.Text;
s1 := '';
//=============查找网页标题===============================
i:=pos('<title>',s);
if i =0 then
i := pos('<TITLE>',s);
s:=copy(s,i+7,length(s)-i-7);//
j:=pos('</title>',s);
if j =0 then
j:=pos('</TITLE>',s);
m:=copy(s,1,j-1);//
Result := m;
//=============查找网页内容===============================
n := pos('id=zoom>',s)+8;
if n-8 >0 then
s1 := rightstr(s,length(s)-n);
if s1 <>'' then
p := pos('</TD>',s1);
Content := Trim(UpperCase(leftstr(s1,p)));
if length(Content)>300 then
Content := leftstr(Content,300);
//=============过滤关键字===============================
Content := AnsiReplaceStr(Content,'<','');
Content := AnsiReplaceStr(Content,'/','');
Content := AnsiReplaceStr(Content,'>','');
Content := AnsiReplaceStr(Content,'BR','');
Content := AnsiReplaceStr(Content,'P','');
Content := AnsiReplaceStr(Content,'TD','');
Content := AnsiReplaceStr(Content,'TR','');
Content := AnsiReplaceStr(Content,'NBS;','');
Content := AnsiReplaceStr(Content,'NBSP;','');
Content := AnsiReplaceStr(Content,'&','');
if trim(Content)='' then
Content := '*';
end;
procedure TWebthread.Postdata(temp, temp1, temp3: string);//索引保存数据库
begin
try
temp := UpperCase(temp);
if (temp<>'') then
if (temp1<>'')then
begin
temp := rightstr(temp,length(temp)-pos('KNOWLEDGEB',temp));
with fupdate.ADOQpub do
begin
close;
sql.Clear;
sql.Add('Insert into web(path,title,Content) values(:a1,:a2,:a3)');
parameters.ParamByName('a1').Value := temp;
parameters.ParamByName('a2').Value := temp1;
parameters.ParamByName('a3').Value := Content;
execsql;
end;
end;
except
end;
end;
第二步、查找多个关键字(多个关键字要空格隔开)
procedure TMainForm.SpeedButton1Click(Sender: TObject);
var
Key,path,title,Content:string;
sqltext:string;
total,pagesize,pagecount:integer;
n,m:integer;
begin
total:=0;
pagesize:=5;
n := 0;
m:= 0;
webhtml := '';
if trim(Edit2.Text)='' then
begin
Messagebox(handle,'关键字不能为空!','系统提示',mb_iconinformation+mb_ok);
Edit2.SetFocus;
exit;
end;
//=======================查找多个关键字========================
if pos(' ',Trim(edit2.Text))<=0 then
sqltext := 'select * from web where title like'+quotedstr('%'+Trim(Edit2.Text)+'%')
else
begin
key := Edit2.Text;
sqltext := 'select * from web where title like '+quotedstr('%'+leftstr(Trim(key),pos(' ',Trim(key))-1)+'%');
key := rightstr(trim(key),length(Trim(key))-pos(' ',Trim(key)));
while pos(' ',Trim(key))>0 do
begin
sqltext := sqltext+' and title like '+quotedstr('%'+leftstr(Trim(key),pos(' ',Trim(key))-1)+'%');
key := rightstr(trim(key),length(Trim(key))-pos(' ',Trim(key)));
next;
end;
sqltext := sqltext + ' and title like '+quotedstr('%'+Trim(key)+'%');
end;
//============查找建立的索引表============================================
try
screen.Cursor := crhourglass;
try
with ADOQPUB do
begin
close;
sql.Clear;
sql.Text := sqltext;
open;
total := Recordcount;//查找到多少条记录
if (total mod pagesize)=0 then
pagecount := (total div pagesize)
else pagecount := (total div pagesize)+1;//可以输出几个页面
first;
while not eof do
begin
inc(n);
path := fieldbyname('path').AsString;//相对路径
path :=ExtractFiledir(ExtractFIleDir(ExtractFileDir(application.ExeName)))+'/'+ path ;//绝对路径
title := Trim(fieldbyname('title').AsString);//网页标题
Content := Trim(fieldbyname('Content').AsString);//网页内容
if trim(Content)='*'then
Content :='';
if trim(title)<>'' then
begin
if trim(path)<>'' then
begin
webhtml := webhtml+outcontent(title, Content,path);//以下为生成的类似百度的查找结果
if ((n mod pagesize)=0) then
begin
memo1.Lines.Append(outtitle(total,pagecount));
memo1.Lines.Append(outbody(total, pagesize));
memo1.Lines.Append(webhtml);
memo1.Lines.Append(outend());
memo1.Lines.SaveToFile(ExtractFiledir(ExtractFIleDir(ExtractFileDir(application.ExeName)))+'/KnowledgeB/Base/temp/'+inttostr((n div pagesize))+'.htm');
webhtml := '';
end
else if total - n < pagesize then
begin
if total = n then
begin
memo1.Lines.Append(outtitle(total,pagecount));
memo1.Lines.Append(outbody(total, pagesize));
memo1.Lines.Append(webhtml);
memo1.Lines.Append(outend());
memo1.Lines.SaveToFile(ExtractFiledir(ExtractFIleDir(ExtractFileDir(application.ExeName)))+'/KnowledgeB/Base/temp/'+inttostr((n div pagesize)+1)+'.htm');
webhtml := '';
end;
end;
end;//if trim(path)<>'' then
end;//if trim(title)<>'' then
next;
end;
end;
shellexecute(handle,nil,pchar(ExtractFiledir(ExtractFIleDir(ExtractFileDir(application.ExeName)))+'/KnowledgeB/Base/base1.htm'),nil,nil,sw_show);
except
Messagebox(handle,'查找失败!','系统提示',mb_iconinformation+mb_ok);
end;
finally
screen.Cursor := crdefault;
end;
end;