这次把采集的数据存入CSV文件,之前写过把数据存入MySQL数据库的文章,请点击 。一些准备工作,比如职位接口、伪装浏览器及传递参数等信息也请在之前的文章里查看,此处不在重述了。
完整代码如下:
# -*- coding: utf-8 -*-
import pandas as pd
#from bs4 import BeautifulSoup
import urllib.request as req
import urllib.parse
import re
import json
import sys
import time
import random
print(sys.getdefaultencoding())
class LagouCrawler:
def __init__(self,location_word,position_word,pages):
self.location_word = location_word
self.position_word = position_word
self.pages = pages
self.location_url = 'https://www.lagou.com/jobs/positionAjax.json?city=%s&needAddtionalResult=false'#positionAjax.json?city=北京&needAddtionalResult=false
def request_method(self,params):
#伪装浏览器&#