经过好几天的摸索,还有大佬的分页方法,这次不需要用selenium进行爬取,可以把商品爬下来,但是期间发现好多都是重复的,又花了点时间。下面讲讲思路,欢迎大佬指点。
数据库连接
import pymysql
from scrapy import signals
from twisted.enterprise import adbapi
from pymysql import cursors
class MysqlTwistedPipline(object):
def __init__(self, dbpool):
self.dbpool = dbpool
self._sql = ''
@classmethod
def from_settings(cls, settings):
dbparms = dict(
host=settings[ "MYSQL_HOST" ],
db=settings[ "MYSQL_DBNAME" ],
user=settings[ "MYSQL_USER" ],
passwd=settings[ "MYSQL_PASSWORD" ],
port=settings["MYSQL_PORT"],
charset='utf8' ,
cursorclass=cursors.DictCursor,
use_unicode=True,
)
dbpool = adbapi.ConnectionPool(&