pachong2

<!--
    xpath   教程:https://blog.csdn.net/li6727975/article/details/46126079
    解析json教程:https://blog.csdn.net/luxideyao/article/details/77802389
-->
<module name="招聘"  type="51job">    
      <!-- 此处 keyword 新闻关键字根据需求设置对应的value-->
      <select>
            <input name="keyword"   type="text" value="java" label="相关关键词,可以职位或公司名称等,和51job官网一样"/>
      </select>
      
      <webSite>https://www.51job.com/</webSite>
      <result>职位,地点,薪资,公司名称,地址,公司性质,规模,分类,招聘要求,发布时间,公司福利,职位信息,公司信息</result>  
      <!-- 有防爬虫,所有需要切换代理IP,但要生效需要对应的套餐使用代理IP -->
      <proxyInfo />
                              
      <!-- 此引擎所有变量替换规则为: ${变量名} -->
      <operator  name="category" desc="获取总页数">
            <request charset="gbk">
                <url>http://search.51job.com/list/000000,000000,0000,00,9,99,${keyword},2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare= </url>
                <header>
                    Connection: keep-alive
                    Upgrade-Insecure-Requests: 1
                    User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36
                    Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
                    Referer: https://search.51job.com
                    Accept-Encoding: gzip, deflate, br
                    Accept-Language: zh-CN,zh;q=0.9
                </header>
                <output>
                    <field name="total_pages" desc="总页数">
                        <parser>//*[@class="p_in"]/span[1]</parser>
                        <script>NumberUtil;getNumber;${total_pages}</script>
                    </field>
                </output>
            </request>
      </operator>

      <operator  name="pagination" desc="分页, pagination为系统命名 ">
            <page  for="1 <= pageNo <= ${total_pages}">
                <request charset="gbk">
                    <url>http://search.51job.com/list/000000,000000,0000,00,9,99,${keyword},2,${pageNo}.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare= </url>
                    <header>
                        Connection: keep-alive
                        Upgrade-Insecure-Requests: 1
                        User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36
                        Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
                        Referer: https://search.51job.com
                        Accept-Encoding: gzip, deflate, br
                        Accept-Language: zh-CN,zh;q=0.9
                    </header>
                    <output>
                        <table for="4 <= i">
                            <field name="listUrl">
                                <parser>//*[@id="resultList"]/div[${i}]/p/span/a/@href</parser>
                            </field>
                            <field name="发布时间">
                                <parser>//*[@id="resultList"]/div[${i}]/span[4]</parser>
                            </field>
                        </table>
                    </output>
                </request>
            </page>
            <criteria>
                <request charset="gbk" desc="从列表进入爬取详情信息 ">
                    <url>${listUrl}</url>
                    <header>
                        Host: jobs.51job.com
                        Connection: keep-alive
                        Upgrade-Insecure-Requests: 1
                        User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36
                        Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
                        Referer: https://search.51job.com/list/000000,000000,0000,00,9,99,${keyword},2,${pageNo}.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=
                        Accept-Encoding: gzip, deflate, br
                        Accept-Language: zh-CN,zh;q=0.9
                    </header>
                    <output>
                        <field name="职位">
                            <parser>//*[@class="tHeader tHjob"]/div/div[1]/h1</parser>
                        </field>
                        <field name="地点">
                            <parser>/html/body/div[3]/div[2]/div[2]/div/div[1]/span</parser>
                        </field>
                        <field name="薪资">
                            <parser>/html/body/div[3]/div[2]/div[2]/div/div[1]/strong</parser>
                        </field>
                        <field name="公司名称">
                            <parser>/html/body/div[3]/div[2]/div[2]/div/div[1]/p[1]/a</parser>    
                        </field>
                        <field name="value">
                            <parser>/html/body/div[3]/div[2]/div[2]/div/div[1]/p[2]</parser>    
                        </field>
                        <field name="公司性质">
                            <script>"${value}".split("|")[0];</script>    
                        </field>
                        <field name="规模">
                            <script>"${value}".split("|")[1];</script>    
                        </field>
                        <field name="分类">
                            <script>"${value}".split("|")[2];</script>    
                        </field>
                        <field name="招聘要求">
                            <parser>/html/body/div[3]/div[2]/div[3]/div[1]/div/div</parser>
                        </field>
                        <field name="公司福利">
                            <parser>/html/body/div[3]/div[2]/div[3]/div[1]/div/p</parser>    
                        </field>
                        <field name="职位信息">
                            <parser>/html/body/div[3]/div[2]/div[3]/div[2]/div/p[1]</parser>
                        </field>
                        <field name="地址">
                            <parser>/html/body/div[3]/div[2]/div[3]/div[3]/div/p/text()</parser>    
                        </field>
                        <field name="公司信息">
                            <parser>/html/body/div[3]/div[2]/div[3]/div[4]/div/text()[1]</parser>    
                        </field>
                    </output>
                </request>
            </criteria>
            
      </operator> 
</module>

 

转载于:https://www.cnblogs.com/sky-ai/p/9839095.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值