python爬虫封装函数_Python3爬虫(面向对象)

昨天说的多线程+数据库存储没有实现出来,只是做了数据库和爬虫的封装,明天继续完成。

数据库封装:

'''

Created on 2017年12月7日

@filename: SqlClass.py

@author: geng

'''

import pymysql

import traceback

class SqlOpt:

__link = ""

__conn = ""

__linkTuple = ("localhost", "root", "123456", "test")

__charset = "utf8"

def __init__(self):

try:

self.__link = pymysql.connect(charset = self.__charset, *self.__linkTuple)

except:

print(traceback.print_exc())

# dml操作,无返回值

def exec_dml(self, sql):

try:

self.__conn = self.__link.cursor()

self.__conn.execute(sql)

self.__link.commit()

return 1

except:

self.__link.rollback()

print(traceback.print_exc())

return 0

# dql操作,返回查询数据

def exec_dql(self, sql):

try:

self.__conn = self.__link.cursor()

self.__conn.execute(sql)

res = self.__conn.fetchall()

return res

except:

print(traceback.print_exc())

return None

def link_close(self):

if(self.__link):

self.__link.close()

'''

link = SqlOpt()

sql = "select * from tb_record limit 0, 10"

res = link.exec_dql(sql)

print(res)

'''

爬虫封装:

'''

Created on 2017年12月7日

@filename: SpiderClass.py

@author: geng

'''

import requests

import re

class SpiderOpt:

def __init__(self, method, url, **kwargs):

self.__response = requests.request(method, url, **kwargs)

def get_response(self):

return self.__response.text

def regular_opt(self, pat, data):

com = re.compile(pat)

rst = com.findall(data)

return rst

工具类封装:

'''

Created on 2017年12月7日

@filename: HelperClass.py

@author: geng

'''

import time

import random

class Helper:

def get_time(self):

t = str(time.time()*1000).split('.')

return t

Main函数:

'''

Created on 2017年12月7日

@filename: main.py

@author: geng

'''

from com.geng.SpiderClass import SpiderOpt

from com.geng.HelperClass import Helper

helper = Helper()

# 内衣

# url = "https://rate.tmall.com/list_detail_rate.htm?itemId=547773818796&spuId=842179060&sellerId=907782288&order=3&append=0&content=1&tagId=&posi=&picture=&ua=098%23E1hvB9vnvPOvUvCkvvvvvjiPPLqWzjY8RLs9sj3mPmPWljl8RLzvljtWRFqWAjlW9phvHnQGNVinzYswzv5b7MJgzRjw9HuCdphvmpvUG9U4V9v1agwCvvpvCvvv2QhvCvvvMMGCvpvVvmvvvhCvmphvLvA4dQvjEGLIAXZTKFEw9Exrs8TJEcqUAj7Q%2Bul1occ63Wv7rjlEgnLv%2B2Kz8Z0vQRAn%2BbyDCwFIAXZTKFEw9Exr08TJnDeDyO2vHd8tvpvIvvvvvhCvvvvvvUEpphvvs9vv9DCvpvQovvmmZhCv2jhvvUEpphvWw4yCvv9vvUvQORQH1UyCvvOUvvVvayptvpvhvvvvv8wCvvpvvUmmdphvmpvWrUpGPvC1nLyCvvpvvvvv&isg=AurqQavURICRWchqI2pb1fXnO1CGWGXUUQpYDnSi0z2Kp4lhXeg-xXOVQeVA&needFold=0"

# 大衣

url = "https://rate.tmall.com/list_detail_rate.htm?itemId=538581707711&spuId=700193432&sellerId=761456278&order=3&append=0&content=1&tagId=&posi=&picture=&ua=098%23E1hvwvvWvRyvUvCkvvvvvjiPPLqUtjtnRsMvgjEUPmP9AjECR2sO6jrPPFsW1jnm3QhvCvmvphm5vpvhvvCCBvhCvvOvChCvvvvEvpCW9a8ByBzhV4g7%2B3%2BuAj7JVXu4X9nr1CuKHdUf8rCl5F%2FAdcH2afmAdX9XjomxfBeKhqUf8rClHd8rejpiYPeAdX9XjLVxfXeKHs9lBdyCvm9vvhCvvvvvvvvvBJZvvUChvvCHtpvv9ZUvvhcDvvmCb9vvBJZvvUhKuphvmvvvpoH%2BZgApkphvC9hvpyPOAvGCvvpvvPMMRphvCvvvphmrvpvEvvV%2Busyv9X6c9phvHHiaTHk9zHi4c4uOts1N7rH4NYGBRphvCvvvphv%3D&isg=AlJSCV-sXM5zGqAyy2IzjS1voxg-YF2cubLwdhyrjoXnL_YpBPDWDbkN6b3o&needFold=0"

regular = [r'\"auctionSku\":\".*?\"', r'\"rateContent\":\".*?\"', r'\"rateDate\":\".*?\"']

info = []

for page in range(1, 5):

t = helper.get_time()

params = {

'currentPage' : page,

'_ksTS' : '{}_{}'.format(str(t[0]), str(t[1])),

'callback' : 'jsonp{}'.format(str(int(t[1]) + 1))

}

kv = {'User-Agent':'Googlebot'}

spider = SpiderOpt("GET", url, params = params, headers = kv)

data = spider.get_response()

# 0:颜色尺寸, 1:评论, 2:时间

for i in range(3):

info.append(spider.regular_opt(regular[i], data))

print("page : ", page, info.pop())

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值