用户操作
[留言]  [发消息]  [加为好友] 
订阅我的博客
XML聚合    FeedSky
订阅到鲜果
订阅到Google
订阅到抓虾
faydu的公告
文章分类
    菲度搜索
    南京外包网
    垂直搜索引擎
    行业搜索引擎研究
    存档

    原创  菲度垂直搜索引擎 代码注释 1 收藏

    Imports System.Data.SqlClient
    Imports System.Data
    Imports System.IO
    Imports Microsoft.VisualBasic
    '胡光伟写与2005-08-12
    '垂直搜索引擎

    Module Main
        Public DoneEvent As New ManualResetEvent(False)
        Public DWork() As Boolean
        Public Qerror As New Queue(1000, 1.5)
        Public Qseach() As Queue
        Public Sn, Psn As Integer
        Public GcountinitWorkEvernt As Integer
        Sub Main()
            Try
                ' QueryData("rssindex")
             
              
                Dim mywait As String
                Dim sl As UNode()
                Console.WriteLine("Begin......")
                initDb()
                Dim flag As Integer = 0

    'initurl 为初始联结地址,stype为R代表对其进行rss的分析,W进行网页的分析
                Dim cmd As SqlCommand = New SqlCommand("select * from initurl where id between 1 and 5 order by id desc", Groblecon)
                Dim cmdcount As SqlCommand = New SqlCommand("select count(*) from initurl ", Groblecon)
                Dim initds As New DataSet
                Dim initdapter As New SqlDataAdapter(cmd)
                initdapter.Fill(initds, "initdata")
                ReDim sl(initds.Tables!initdata.Rows.Count - 1)
                ReDim DWork(initds.Tables!initdata.Rows.Count - 1)
                ReDim Qseach(initds.Tables!initdata.Rows.Count - 1)
               
                GcountinitWorkEvernt = CInt(initds.Tables!initdata.Rows.Count - 1)
                cmdcount.Dispose()
                Dim ij As Integer
                For ij = 0 To GcountinitWorkEvernt
                    DWork(ij) = False
                    Qseach(ij) = New Queue
                Next
                For ij = 0 To GcountinitWorkEvernt
                    With initds.Tables!initdata.Rows.Item(ij)
                        Dim un As New UNode(-1, Sn, 0, .Item("weburl"), False, .Item("rank"), .Item("utype"), Trim(.Item("encode")))
                        un.Tiltle = .Item("title")
                        un.Ext = .Item("Ext")
                        un.Rank = .Item("Rank")
                        un.Content = .Item("content")
                        un.InitStep = .Item("step")
                        un.MetaValue = .Item("meta")
                        un.SearchType = .Item("stype")
                        un.TemplateId = CInt(.Item("temid"))
                        un.MaxGet = CInt(.Item("maxcount"))
                        un.MaxDeepLevel = CInt(.Item("deeplevel"))
                        sl(ij) = un
                        'Console.WriteLine(un.SearchType)
                    End With

                Next

    每个id的初始地址开始新的线程,请不要超过25个

                Dim c As Integer
                For c = 0 To UBound(sl)
                    Interlocked.Increment(Sn)
                    Dim vobj As New VisitObject(sl(c).MaxGet, c, sl(c))
                    ThreadPool.QueueUserWorkItem(New WaitCallback(AddressOf vobj.Vall), sl(c))
                Next
                DoneEvent.WaitOne()

                Console.WriteLine("ALL FINISHED.")
                Console.WriteLine("FtG......")
                While Qerror.Count <> 0
                    Dim ua As UNode = CType(Qerror.Dequeue, UNode)
                    Console.WriteLine(ua.Address)
                End While


                OptizeDataBase()

    ‘'搜索到信息在队列中,现在入库,对RSS信息和web信息分开
                AddInfoItem()
                Console.WriteLine("Index DataBase.")

    ’使用lucene进行处理,生成反排的索引文件
                IndexDoc()

                Console.WriteLine("Finish Index DataBase.")
                mywait = Console.ReadLine
                While mywait <> "exit"
                    mywait = Console.ReadLine
                End While
                CloseDb()
            Catch ex As Exception
                Console.WriteLine(ex.ToString)
            End Try


          
        End Sub
     

    End Module

    发表于 @ 2006年04月18日 17:17:00 | 评论( loading... ) | 编辑| 举报| 收藏

    旧一篇:菲度垂直搜索引擎代码  | 新一篇:菲度垂直搜索引擎 代码注释 2

    • 发表评论
    • 评论内容:
    •  
    Copyright © faydu
    Powered by CSDN Blog