python+selenium抓取h5端京东商品信息

 

from flask import Flask,request,render_template
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import sqlite3
import json
import time


class jd_mobile_project:
    def __init__(self):
        mobileEmulation = {'deviceName': 'iPhone 6'}
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_experimental_option('mobileEmulation', mobileEmulation)
        # chrome_options.add_argument('--disable-gpu')
        # chrome_options.add_argument('--no-sandbox')
        # chrome_options.add_argument('--disable-dev-shm-usage')
        # chrome_options.add_argument('--ignore-certificate-errors')
        # chrome_options.add_argument('--ignore-ssl-errors')
        # prefs = {"profile.managed_default_content_settings.images": 2}
        # chrome_options.add_experimental_option("prefs", prefs)
        # 启动谷歌浏览器
        chromedriver_path = './chromedriver.exe'
        self.driver=webdriver.Chrome(options=chrome_options,executable_path=chromedriver_path)
        self.driver.implicitly_wait(1)

    def login_jd(self):
        # with open('mobile.txt', 'r') as f:
        #     # 使用json读取cookies 注意读取的是文件 所以用load而不是loads
        #     cookies_list = json.load(f)
        #     for cookie in cookies_list:
        #         # print(cookie)
        #         self.driver.add_cookie(cookie)
        #     self.driver.refresh()
        #使用ck登录
        f =open('mobile.txt','r',encoding='utf-8')
        # print(f.readline().split(';'))
        for i in f.readline().split(';'):
            # print(i.strip())
            if '=' in i.strip():
                cookie=i.strip().split('=')
                dict={}
                dict['domain']='.jd.com'
                dict['expiry']=2481587844
                dict['httpOnly']=False
                dict['httpOnly']=False
                dict['name']=cookie[0]
                dict['path']='/'
                dict['secure']=False
                dict['value'] = cookie[1]
                print(dict)
                self.driver.add_cookie(dict)
            else:
                pass
        self.driver.refresh()
        try:
            self.driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/div[3]/div[1]/span').text
            return '成功'
        except:
            return '失败'

    #获取优惠券列表功能
    def get_coupons(self):
        url = 'https://home.m.jd.com/myJd/home.action'
        self.driver.get(url)
        logintext=self.driver.find_element_by_xpath('//*[@id="header"]/span[2]').text
        if logintext=='京东登录注册':
            # print('未登录,重新登录')
            res=self.login_jd()
            if res=='成功':
                try:
                    self.driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/div[3]/div[1]/span').click()
                except:
                    return '获取元素失败'
            else:
                return '登录失败'
        try:
            self.driver.find_element_by_xpath('/html/body/div[2]/div/div[1]/div[3]/div[1]/span').click()
        except:
            # print('登录失败')
            return '进入优惠券列表按钮未找到'
        try:
            coupon_items = self.driver.find_elements_by_xpath('//div[@class="xcoupon type_dong"]')
        except:
            return '优惠券列表中的数据未找到'
        num = len(coupon_items)
        if num>0:
            # print('登录成功')
            conn = sqlite3.connect("customers.db")
            cursor = conn.cursor()
            del_sql = 'DELETE FROM coupons'
            cursor.execute(del_sql)
            conn.commit()
            conn.close()
        # print('找到'+str(num)+'个优惠券')
        for i in range(num):
            coupon_items = self.driver.find_elements_by_xpath('//div[@class="xcoupon type_dong"]')
            # 优惠券金额
            Coupon_Price = coupon_items[i].find_element_by_class_name('single_range').find_element_by_xpath(
                'div/span[2]').text
            # 优惠券条件
            Limit = coupon_items[i].find_element_by_class_name('single_range').find_element_by_xpath('div[2]').text
            # 日期
            Time = coupon_items[i].find_element_by_class_name('xcoupon_info_date').text
            # 限购商品
            Category = coupon_items[i].find_element_by_class_name('vam').text

            # 券编号
            # ele = coupon_items[i].find_element_by_class_name('xcoupon_label')
            # self.driver.execute_script('arguments[0].click()', ele)  # 如果被点击标签被其他标签遮盖,则需要使用这种方式点击
            # number = coupon_items[i].find_element_by_class_name('xcoupon_detail_info').find_element_by_xpath(
            #     'div[2]').text[4:]

            # 优惠券对应的商品列表地址
            ele_href = coupon_items[i].find_element_by_class_name('xcoupon_use_btn')
            self.driver.execute_script('arguments[0].click()', ele_href)  # 如果被点击标签被其他标签遮盖,则需要使用这种方式点击
            href = self.driver.current_url#获取当前页面地址栏中的url
            # 获取couponbath值
            coupon_bath = href.split('=')[2].split('&')[0]
            conn = sqlite3.connect("customers.db")
            cursor = conn.cursor()
            l = "insert into coupons(money,limits,`time`,category,href,coupon_batch ) values (?,?,?,?,?,?)"
            shuju = (int(Coupon_Price), Limit, Time, Category, href, coupon_bath)
            try:
                cursor.execute(l, shuju)
                conn.commit()
            except Exception as e:
                print('插入数据报错,报错信息:'+e)
            self.driver.back()
            conn.close()
        return 'success'

    #下拉刷新功能
    def Drop_down_refresh(self):
        px = 0
        while True:
            px += 1
            try:
                self.driver.find_element_by_id('selSelect').click()
                isok = 1
            except:
                isok = 2
            if isok == 2:
                js1 = ("window.scrollTo(0," + str(px * 1000) + ")")
                self.driver.execute_script(str(js1))
            else:
                return 'success'

    #获取商品列表信息
    def get_prodects(self,couponid):
        conn = sqlite3.connect("customers.db")
        cursor = conn.cursor()
        # print(couponid)
        url='https://so.m.jd.com/list/couponSearch.action?ptag=&couponbatch='+str(couponid)+'&coupon_shopid=210857&sceneval=2&jxsid=16178647291543884298'
        self.driver.get(url)
        try:
            logintext = self.driver.find_element_by_xpath('//*[@id="header"]/span[2]').text
        except:
            return '请关闭服务重新打开'
        if logintext == '京东登录注册':
            # print('未登录,重新登录')
            #如果没有登录,就调用登录函数
            self.login_jd()

        sel=self.driver.find_element_by_id('selSelect').find_elements_by_xpath('./option')
        del_sql = 'DELETE FROM product_list where coupon_id=%s' % str(couponid)
        cursor.execute(del_sql)
        conn.commit()
        for page in range(len(sel)):
            #调用下拉刷新功能
            self.Drop_down_refresh()
            # print('--------------------------------第%s页,总共%s页-------------------------------'%(str(page+1),str(len(sel))))
            try:
                prodects=self.driver.find_elements_by_xpath('//div[@class="search_prolist_item"]')
            except:
                return '商品列表没有找到'
            # print(len(prodects))
            for prodect in prodects:
                try:
                    #商品skuid
                    skuid = prodect.get_attribute('skuid')
                    #商品名称
                    name=prodect.find_element_by_class_name('search_prolist_title').text
                    #商品链接
                    tourl=prodect.find_element_by_id('link_'+str(skuid)).get_attribute('tourl')
                    #商品价格
                    price=tourl.split('price=')[1].split('&')[0]

                    l = "insert into product_list(product_name,product_price,product_sku,product_url,coupon_id) values (?,?,?,?,?)"
                    shuju = (name, price, skuid, tourl, str(couponid))
                    try:
                        cursor.execute(l, shuju)
                        conn.commit()
                    except Exception as e:
                        return '插入数据报错,报错信息:'+e
                except:
                    pass
            #点击下一页
            next_page=self.driver.find_element_by_id('pageNext')
            self.driver.execute_script('arguments[0].click()', next_page)
        #关闭数据库链接
        conn.close()
        return 'id是'+couponid+'的优惠券对应的商品列表抓取成功'

    #商品优惠信息
    def product_detail(self,url,main_id):
        # print(url,main_id)
        conn = sqlite3.connect("customers.db")
        cursor = conn.cursor()
        self.driver.get(url)
        promoteList=self.driver.find_element_by_id('promoteList')
        type=promoteList.get_attribute('style')
        if type=='display: none;':
            l = "insert into product_discount(main_id,title) values (?,?)"
            shuju = (main_id,'此商品没有促销政策')
            try:
                cursor.execute(l, shuju)
                conn.commit()
            except Exception as e:
                # print('插入数据报错,报错信息:' + e)
                return '插入数据报错,报错信息:' + e
            # print('此商品没有促销政策')
        else:
            try:
                items = promoteList.find_elements_by_xpath('//*[@id="promoteList"]/div[@class="de_row prom_item"]')
                for i in items:
                    title=i.find_element_by_class_name('txt').text
                    # print('title--'+title)
                    l = "insert into product_discount(main_id,title) values (?,?)"
                    shuju = (main_id,title)
                    try:
                        cursor.execute(l, shuju)
                        conn.commit()
                    except Exception as e:
                        # print('插入数据报错,报错信息:' + e)
                        return '插入数据报错,报错信息:' + e
                    gift_list=i.find_elements_by_xpath('//div[@class="gift_list_item item "]')
                    for gift in gift_list:
                        title_list_url=gift.get_attribute('tourl')
                        title_list_text=gift.find_element_by_class_name('name').text
                        l = "insert into product_discount(main_id,title,url) values (?,?,?)"
                        shuju = (main_id,title_list_text,title_list_url)
                        try:
                            cursor.execute(l, shuju)
                            conn.commit()
                        except Exception as e:
                            # print('插入数据报错,报错信息:' + e)
                            return '插入数据报错,报错信息:' + e
                        # print('title--'+title_list_text+title_list_url)
            except:
                # print('title没有')
                pass
            try:
                items=promoteList.find_elements_by_xpath('//div[@class="de_row prom_item higher_de_tag "]')
                for i in items:
                    text = i.find_element_by_class_name('de_span').find_element_by_xpath('span').text
                    url = i.get_attribute('tourl')
                    l = "insert into product_discount(main_id,title,url) values (?,?,?)"
                    shuju = (main_id,text,url)
                    try:
                        cursor.execute(l, shuju)
                        conn.commit()
                    except Exception as e:
                        # print('插入数据报错,报错信息:' + e)
                        return '插入数据报错,报错信息:' + e
                    # print('higher--' + text + url)
            except:
                # print('higher没有')
                pass
            try:
                items=promoteList.find_elements_by_xpath('//div[@class="de_row prom_item  "]')
                for i in items:
                    text=i.find_element_by_class_name('de_span').find_element_by_xpath('span').text
                    url=i.get_attribute('tourl')
                    l = "insert into product_discount(main_id,title,url) values (?,?,?)"
                    shuju = (main_id, text, url)
                    try:
                        cursor.execute(l, shuju)
                        conn.commit()
                    except Exception as e:
                        # print('插入数据报错,报错信息:'+e)
                        return '插入数据报错,报错信息:' + e
                    # print('span--'+text+url)
            except:
                # print('span没有')
                pass
        conn.close()


comm=jd_mobile_project()

app=Flask(__name__)

@app.route('/')
def index():
    return render_template('jd_mobile/index.html')

#保存优惠券列表
@app.route('/getcoupons/')
def getcoupons():
    res=comm.get_coupons()
    if res=='success':
        return '获取优惠券成功'
    else:
        return '获取优惠券失败'+res
#读取优惠券列表
@app.route('/read_coupons_list/')
def read_coupons_list():
    try:
        conn = sqlite3.connect("customers.db")
        cursor = conn.cursor()
        l = 'select * from coupons'
        value = cursor.execute(l)
        return render_template('jd_mobile/coupons_list.html',result=value)
    except Exception as e:
        return e
    finally:
        conn.close()
@app.route('/product_list/<couponid>')
def product_list(couponid):
    res=comm.get_prodects(couponid)
    return res

#抓取优惠券对应的商品列表数据
@app.route('/read_product_list/',methods=['GET','POST'])
def read_product_list():
    if request.method=='POST':
        search=request.form['search']
        select='2'
        if select == '2':
            l = "select * from product_list where product_name like '%"+search+"%'"
            conn = sqlite3.connect("customers.db")
            cursor = conn.cursor()
            value = cursor.execute(l,)
        else:
            l = "select * from product_list where product_name like '%" + search + "%' and isdisplay=?"
            conn = sqlite3.connect("customers.db")
            cursor = conn.cursor()
            value = cursor.execute(l, (select,))
        try:
            return render_template('jd_mobile/products_list.html', result=value)
        except Exception as e:
            return e
        finally:
            conn.close()
    else:
        try:
            conn = sqlite3.connect("customers.db")
            cursor = conn.cursor()
            l = 'select * from product_list'
            value = cursor.execute(l)
            return render_template('jd_mobile/products_list.html',result=value)
        except Exception as e:
            return e
        finally:
            conn.close()

@app.route('/read_product_detail/')
def read_product_detail():
    conn = sqlite3.connect("customers.db")
    cursor = conn.cursor()
    # 删除优惠券列表
    del_sql = 'DELETE FROM product_discount '
    cursor.execute(del_sql)
    conn.commit()
    # 查询商品数据库
    l = 'select id,product_url from product_list'
    value = cursor.execute(l)
    values = []
    for i in value:
        dict = {}
        dict['main_id'] = i[0]
        dict['url'] = i[1]
        # print(i[0],i[1])
        values.append(dict)
    conn.close()
    for item in values:
        comm.product_detail(item['url'], item['main_id'])
        time.sleep(2)
    return '成功'
@app.route('/read_product_discount/<int:id>')
def read_product_discount(id):
    conn = sqlite3.connect("customers.db")
    cursor = conn.cursor()
    l = 'select * from product_discount where main_id='+str(id)
    value = cursor.execute(l)
    return render_template('jd_mobile/product_discount.html',result=value)

@app.route('/Delete_item/<id>')
def Delete_item(id):
    conn = sqlite3.connect("customers.db")
    cursor = conn.cursor()
    del_sql = 'DELETE FROM product_list where id=%s' % str(id)
    cursor.execute(del_sql,)
    conn.commit()

    del_sql = 'DELETE FROM product_discount where main_id=%s' % str(id)
    cursor.execute(del_sql, )
    conn.commit()
    conn.close()
    return "删除商品成功"


@app.route('/islogin',methods=['GET','POST'])
def islogin():
    if request.method == 'POST':
        ck = request.form['ck']
        with open('mobile.txt','w',encoding='utf-8') as f:
            f.write(ck)
            return 'ck保存成功,可以尝试是否可以抓取'


if __name__ == '__main__':

    app.run()

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值