第3关:模拟登陆拉勾网爬取招聘信息
原文链接: https://blog.csdn.net/qq_44111805/article/details/116331326
原文章基础pipeline.py文件修改为:
# -*- coding: utf-8 -*-
import pymysql
class AjaxprojectPipeline(object):
def open_spider(self, spider):
# 创建数据库连接,并初始化表
self.connection = pymysql.connect(
host='localhost',
port=3306,
user='root',
passwd='123123',
db='mydb',
charset='utf8'
)
with self.connection.cursor() as cursor:
sql = '''
CREATE TABLE IF NOT EXISTS lgjobs (
jobName varchar(20) CHARACTER SET utf8 NOT NULL,
jobMoney varchar(10),
jobNeed varchar(20) CHARACTER SET utf8,
jobCompany varchar(20) CHARACTER SET utf8,
jobType varchar(20) CHARACTER SET utf8,
jobSpesk varchar(20) CHARACTER SET utf8,
PRIMARY KEY(jobName)
)
'''
cursor.execute(sql)
self.connection.commit()
def process_item(self, item, spider):
jobName = item['jobName']
jobMoney = item['jobMoney']
jobNeed = item['jobNeed']
jobCompany = item['jobCompany']
jobType = item['jobType']
jobSpesk = item['jobSpesk']
with self.connection.cursor() as cursor:
sql = '''
INSERT INTO lgjobs (jobName, jobMoney, jobNeed, jobCompany, jobType, jobSpesk)
VALUES (%s, %s, %s, %s, %s, %s)
'''
cursor.execute(sql, (jobName, jobMoney, jobNeed, jobCompany, jobType, jobSpesk))
self.connection.commit()
return item
def close_spider(self, spider):
self.connection.close()