asp.net 基于html的文件搜索引擎实现

最新推荐文章于 2023-04-19 12:13:56 发布

iteye_11486

最新推荐文章于 2023-04-19 12:13:56 发布

阅读量123

点赞数

文章标签：数据库

1.引擎的实现部分（其中涉及到数据库的表的操作，这里使用的是存储过程）：

using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Data; using System.Data.SqlClient; using System.Text; using System.Text.RegularExpressions; using Internship.Data; namespace Internship.WebSite { public static class Searching { private static List<Entry> _Catalog = new List<Entry>(); public static List<Entry> Catalog { get { return _Catalog; } set { _Catalog = value; } } public static void AddToCatalog(Entry entry) { Catalog.Add(entry); } public static void RemoveFromCatalog(Entry entry) { Catalog.Remove(entry); } static Searching() { Catalog = BuildCatalog(); } public static List<Entry> Hit(String searchItem) { List<Entry> entry = new List<Entry>(); List<Result> result = BuildResultSet(searchItem, false); foreach (var e in result) entry.Add(e.IEntry); return entry; } private static List<Result> BuildResultSet(string searchTerm, bool includeComments) { List<Result> results = new List<Result>(); string term = CleanContent(searchTerm.ToLowerInvariant().Trim(), false); string[] terms = term.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); string regex = string.Format(System.Globalization.CultureInfo.InvariantCulture, "({0})", string.Join("|", terms)); foreach (Entry entry in Catalog) { Result result = new Result(); result.Rank = 0; result.IEntry = entry; int titleMatches = Regex.Matches(entry.Title, regex).Count; result.Rank = titleMatches * 20; int postMatches = Regex.Matches(entry.Content, regex).Count; result.Rank += postMatches; if (result.Rank > 0) { if (result.IEntry.Content.Length > 200) { result.IEntry.Content = result.IEntry.Content.Substring(0, 200); result.IEntry.Content += "...."; } result.IEntry.Title = Replace(result.IEntry.Title, terms); result.IEntry.Content = Replace(result.IEntry.Content, terms); results.Add(result); } } results.Sort(); return results; } public static List<Entry> BuildCatalog() { List<Entry> entry = new List<Entry>(); using (SqlConnection cn = new SqlConnection(InternshipSettings.WebSiteConnectionString)) { SqlCommand cmd = new SqlCommand("GetAllJobPostings", cn); cmd.CommandType = CommandType.StoredProcedure; cn.Open(); using (SqlDataReader dr = cmd.ExecuteReader()) { while (dr.Read()) { Entry en = new Entry(); en.Kinds = 1;// 1 for jobposting ,2 for company instroduction en.ID = dr["JobPostingID"].ToString(); en.Title = dr["JobPostingTitle"].ToString(); en.Content = dr["JobPostingText"].ToString(); en.PostDate = dr["JobPostingTime"].ToString(); en.Content = CleanContent(en.Content, true); entry.Add(en); } } } using (SqlConnection cn = new SqlConnection(InternshipSettings.WebSiteConnectionString)) { SqlCommand cmd = new SqlCommand("GetAllCompanyIntroductions", cn); cmd.CommandType = CommandType.StoredProcedure; cn.Open(); using (SqlDataReader dr = cmd.ExecuteReader()) { while (dr.Read()) { Entry en = new Entry(); en.Kinds = 2; // 1 for jobposting ,2 for company instroduction en.ID = dr["IntroductionID"].ToString(); en.Title = dr["CompanyName"].ToString(); en.Content = dr["Introduction"].ToString() + dr["Address"].ToString() + dr["Phone"].ToString(); en.Content = CleanContent(en.Content, true); entry.Add(en); } } } return entry; } private static readonly Regex STRIP_HTML = new Regex("<[^>]*>", RegexOptions.Compiled); /// <summary> /// Strips all HTML tags from the specified string. /// </summary> /// <param name="html">The string containing HTML</param> /// <returns>A string without HTML tags</returns> public static string StripHtml(string html) { if (string.IsNullOrEmpty(html)) return string.Empty; return STRIP_HTML.Replace(html, " "/*string.Empty*/); } public static string CleanContent(string content, bool removeHtml) { if (removeHtml) content = StripHtml(content); content = content .Replace("//", string.Empty) .Replace("|", string.Empty) .Replace("(", string.Empty) .Replace(")", string.Empty) .Replace("[", string.Empty) .Replace("]", string.Empty) .Replace("*", string.Empty) .Replace("?", string.Empty) .Replace("}", string.Empty) .Replace("{", string.Empty) .Replace("^", string.Empty) .Replace("+", string.Empty) .Replace("", string.Empty); string[] words = content.Split(new char[] { ' ', '/n', '/r' }, StringSplitOptions.RemoveEmptyEntries); StringBuilder sb = new StringBuilder(); for (int i = 0; i < words.Length; i++) { string word = words[i].ToLowerInvariant().Trim(); if (word.Length > 1 /*&& !_StopWords.Contains(word)*/) sb.Append(word + " "); //sb.Append(word); } return sb.ToString(); } private static string Replace(string src, string[] terms) { foreach (var term in terms) { string replace = "<font color=/"#FF0066/">" + term + "</font>"; src = src.Replace(term, replace); } return src; } } }

2。如何使用？

if (!string.IsNullOrEmpty(TB.Text)) { CurPage = 1; entry = Searching.Hit(TB.Text); if (entry.Count % PAGE_SIZE == 0) TolPage = entry.Count / PAGE_SIZE; else TolPage = entry.Count / PAGE_SIZE+1; DataBind(CurPage, TolPage); }