<!--
xpath 教程:https://blog.csdn.net/li6727975/article/details/46126079
解析json教程:https://blog.csdn.net/luxideyao/article/details/77802389
-->
<module name="招聘" type="51job">
<!-- 此处 keyword 新闻关键字根据需求设置对应的value-->
<select>
<input name="keyword" type="text" value="java" label="相关关键词,可以职位或公司名称等,和51job官网一样"/>
</select>
<webSite>https://www.51job.com/</webSite>
<result>职位,地点,薪资,公司名称,地址,公司性质,规模,分类,招聘要求,发布时间,公司福利,职位信息,公司信息</result>
<!-- 有防爬虫,所有需要切换代理IP,但要生效需要对应的套餐使用代理IP -->
<proxyInfo />
<!-- 此引擎所有变量替换规则为: ${变量名} -->
<operator name="category" desc="获取总页数">
<request charset="gbk">
<url>http://search.51job.com/list/000000,000000,0000,00,9,99,${keyword},2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare= </url>
<header>
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: https://search.51job.com
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
</header>
<output>
<field name="total_pages" desc="总页数">
<parser>//*[@class="p_in"]/span[1]</parser>
<script>NumberUtil;getNumber;${total_pages}</script>
</field>
</output>
</request>
</operator>
<operator name="pagination" desc="分页, pagination为系统命名 ">
<page for="1 <= pageNo <= ${total_pages}">
<request charset="gbk">
<url>http://search.51job.com/list/000000,000000,0000,00,9,99,${keyword},2,${pageNo}.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare= </url>
<header>
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: https://search.51job.com
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
</header>
<output>
<table for="4 <= i">
<field name="listUrl">
<parser>//*[@id="resultList"]/div[${i}]/p/span/a/@href</parser>
</field>
<field name="发布时间">
<parser>//*[@id="resultList"]/div[${i}]/span[4]</parser>
</field>
</table>
</output>
</request>
</page>
<criteria>
<request charset="gbk" desc="从列表进入爬取详情信息 ">
<url>${listUrl}</url>
<header>
Host: jobs.51job.com
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Referer: https://search.51job.com/list/000000,000000,0000,00,9,99,${keyword},2,${pageNo}.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
</header>
<output>
<field name="职位">
<parser>//*[@class="tHeader tHjob"]/div/div[1]/h1</parser>
</field>
<field name="地点">
<parser>/html/body/div[3]/div[2]/div[2]/div/div[1]/span</parser>
</field>
<field name="薪资">
<parser>/html/body/div[3]/div[2]/div[2]/div/div[1]/strong</parser>
</field>
<field name="公司名称">
<parser>/html/body/div[3]/div[2]/div[2]/div/div[1]/p[1]/a</parser>
</field>
<field name="value">
<parser>/html/body/div[3]/div[2]/div[2]/div/div[1]/p[2]</parser>
</field>
<field name="公司性质">
<script>"${value}".split("|")[0];</script>
</field>
<field name="规模">
<script>"${value}".split("|")[1];</script>
</field>
<field name="分类">
<script>"${value}".split("|")[2];</script>
</field>
<field name="招聘要求">
<parser>/html/body/div[3]/div[2]/div[3]/div[1]/div/div</parser>
</field>
<field name="公司福利">
<parser>/html/body/div[3]/div[2]/div[3]/div[1]/div/p</parser>
</field>
<field name="职位信息">
<parser>/html/body/div[3]/div[2]/div[3]/div[2]/div/p[1]</parser>
</field>
<field name="地址">
<parser>/html/body/div[3]/div[2]/div[3]/div[3]/div/p/text()</parser>
</field>
<field name="公司信息">
<parser>/html/body/div[3]/div[2]/div[3]/div[4]/div/text()[1]</parser>
</field>
</output>
</request>
</criteria>
</operator>
</module>