regular

<!DOCTYPE regular-resource PUBLIC "Spider-Core workflow dtd"
        "http://rc.91yong.com/spider.dtd"
        >
<regular-resource prefix="ganji">

    <node id="validateLogin">

        <node exists="yes">
            <filter><![CDATA[/user/login_success@username=[^@]+@]]></filter>
        </node>
    </node>

    <node id="searchResumePage">

        <node>
            <filter><![CDATA[赶集网为您找到<em>\s*(\d+)\s*</em>条信息]]></filter>
            <node name="totalCount" order="1">
                <value-filter name="DealText"></value-filter>
            </node>
        </node>

        <node>
            <filter><![CDATA[(?i)<ul[^>]*class=['"]pageLink[^>]+>(?:[^<]*<li.*?</li>)*[^<]*<li[^>]*><a[^>]+>(\d+).*?</li>]]></filter>
            <node name="pageTotal" order="1">
                <value-filter name="DealText"></value-filter>
            </node>
        </node>

        <node>
            <filter><![CDATA[<input type="hidden" value="([^"]+)"\s*id="to-select-city-default"]]></filter>
            <node name="city" order="1">
                <value-filter name="DealText"></value-filter>
            </node>
        </node>

    <node name="resumes" times="multi">

            <filter><![CDATA[<dl class="list_noimg job-list">\s*[^>]+>[^f]+f="/([^"]+)"[^>]+>([^<]+)<[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>[^>]+>\s*([^>])\s*[^>]+>[^>]+>\s*(\d+)岁[^>]+>[^>]+>[^>]+>[^>]+>\s*([^<]+)[^>]+>[^>]+>\s*([^<]+)<[^>]+>\s*[^>]+>([^<]+)<]]></filter>
            <node name="outterId" order="1">
                <value-filter name="DealText"></value-filter>
            </node>

            <node name="name" order="2">
                <value-filter name="DealText"></value-filter>
            </node>
            <node name="gender" order="3">
                <value-filter name="DealText"></value-filter>
                <dic-filter>
                    <dicname>sex</dicname>
                    <from>value</from>
                    <to>stdkey</to>
                </dic-filter>
            </node>
            <node name="dateOfBirth" order="4">
                <value-filter name="DealText"></value-filter>
            </node>
            <node name="degree" order="5">
                <value-filter name="DealText"></value-filter>
                <dic-filter>
                    <dicname>educational</dicname>
                    <from>value</from>
                    <to>stdkey</to>
                </dic-filter>
            </node>
            <node name="serviceYear" order="6">
                <value-filter name="DealText"></value-filter>
            </node>

            <node name="updateTime" order="7">
                <value-filter name="DealText"></value-filter>
            </node>

 

    </node>

    </node>

    <node id="detailResume">

<!--        <node>
            <filter><![CDATA[<meta http-equiv="mobile-agent" content="format=xhtml; url=http://wap.ganji.cn/([^/]+)/]]></filter>
            <node name="outterId" order="1">
                <value-filter name="DealText"/>
            </node>
        </node>-->

        <node>
            <filter><![CDATA[<dt class="fc4b".*?[^<]class="fc70"]]></filter>
            <filter><![CDATA[>([^<\s]+)<.*?>([^<\s]+)<.*?>([^<\s]+)岁<]]></filter>
            <node name="name" order="1">
                <value-filter name="DealText"/>
            </node>
            <node name="gender" order="2">
                <dic-filter>
                    <dicname>sex</dicname>
                    <from>value</from>
                    <to>stdkey</to>
                </dic-filter>
            </node>
            <node name="dateOfBirth" order="3">
                <value-filter name="DealText"/>
            </node>
        </node>

        <node>
            <filter><![CDATA[发布时间:.*?([^<]+)]]></filter>
            <node name="updateTime" order="1">
                <value-filter name="DealText"/>
            </node>
        </node>

<!--        <node>
            <filter><![CDATA[<span>([男|女])]]></filter>
            <node name="gender" order="1">
                <value-filter name="DealText"/>
            </node>
        </node>-->

        <node>
            <filter><![CDATA[教育经历</dt.*?</dd>]]></filter>
            <filter><![CDATA[\|([^|]+)\|([^|]+)\|([^|<]+)]]></filter>
            <node name="major" order="3">
                <value-filter name="DealText"/>
            </node>
            <node name="degree" order="2">
                <dic-filter>
                    <dicname>educational</dicname>
                    <from>value</from>
                    <to>stdkey</to>
                </dic-filter>
            </node>
            <node name="graduatedFrom" order="1">
                <value-filter name="DealText"/>
            </node>


        </node>

 

        <node>
            <filter><![CDATA[工作年限:([^/li]+)[^</]]]></filter>
            <node name="workYear" order="1">
                <value-filter name="DealText"/>
            </node>
        </node>


        <node>
            <filter><![CDATA[电话\s+:.*?<img.*?src=".*?([^"]+)"]]></filter>
            <node name="mobile" order="1">
                <value-filter name="DealText"/>
            </node>
        </node>

        <node>
            <filter><![CDATA[邮箱\s+:([^<]*)]]></filter>
            <node name="email" order="1">
                <value-filter name="DealText"/>
            </node>
        </node>

 

        <node>
            <filter><![CDATA[工作经验</d.*?[^<+]</p>]]></filter>
            <filter><![CDATA[\|([^|]+)\|([^|]+)<]]></filter>
            <node name="lastCompany" order="1">
                <value-filter name="DealText"/>
            </node>
            <node name="lastPosition" order="2">                                 <!--这里的属性如果跟resumespiderresume同名则会自带将值扶进resumeEntity  -->
                <value-filter name="DealText"/>
            </node>
        </node>


        <node>
            <filter><![CDATA[工作地点.*?</i.*?([^>]+)-]]></filter>

            <node name="currentAddress" order="1">
                <value-filter name="DealText"/>
                <dic-filter>
                    <dicname>workareas</dicname>
                    <from>value</from>
                    <to>stdkey</to>
                </dic-filter>
            </node>

        </node>


    </node>

 


    <node id="mobile_pic">
        <node name="urls" times="multi">
            <filter><![CDATA[([^'"]+tel_img/[^'"]+)]]></filter>
            <node name="url" order="1"></node>
        </node>
    </node>

    <node id="checkCodeValidate">
        <node exists="no">
            <filter><![CDATA[<title>机器人确认</title>]]></filter>
        </node>
    </node>


    <node id="validate1">
        <node>
            <filter><![CDATA[(?i)data-url-template=['"](http://www.ganji.com/findjob/download_resume\.php\?[^'"]+)]]></filter>
            <node name="group1" order="1"/>
        </node>
    </node>


    <node id="checkFreeCount">
        <node>
            <filter><![CDATA[今天还可以免费查看[^>]+>[1-5]</span>份]]></filter>
             <node  name="freecheck"/>
        </node>
        <node>
            <filter><![CDATA[查看联系方式需要扣除<b style="font-weight:bold">1</b>份下载简历数,您目前有]]></filter>
            <node  name="freecheck"/>
        </node>
    </node>


    <node id="getCityCode">
        <node>
            <filter><![CDATA[city_code=([^&]+)]]></filter>
            <node name="getCityCode" order="1"/>
        </node>
    </node>

    <node id="getPostId">
        <node>
            <filter><![CDATA[post_id=([^&]+)]]></filter>
            <node name="getPostId" order="1"/>
        </node>
    </node>

    <node id="getJob_postion">
        <node>
            <filter><![CDATA[job_postion=([^&]+)]]></filter>
            <node name="getJob_postion" order="1"/>
        </node>
    </node>

    <node id="replaceHtml">
        <node>
            <filter><![CDATA[job_postion=([^&]+)]]></filter>
            <node name="getJob_postion" order="1"/>
        </node>
    </node>

<!--    <node id="imgUrl">
        <node times="multi" name="urls">
            <filter><![CDATA[<img[^>]+src=['"]([^'"]+)['"]]]></filter>
            <node name="url" order="1">
            </node>
        </node>
    </node>-->

    <node id="getPhone">
        <node>
<!--            <filter><![CDATA[</p></div>(<script type="text/javascript".*?function.*?(tip_id).*</script>)]]></filter>-->
            <!--<filter><![CDATA[(phone.*?email.*?,)]]></filter>-->
            <filter><![CDATA[(phone.*?email.*?,)]]></filter>

            <node name="phoneAndEmail" order="1"/>
        </node>
    </node>

    <node id="getResumePoint">
        <node>
            <filter><![CDATA[剩余简历份数.*?<em>([^<]+)</em>]]></filter>

            <node name="reumePoint" order="1"/>
        </node>
    </node>

</regular-resource>

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值