货比三家 tb1.3

import time,requests,os
import json
import tkinter as tk
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

def get_browser():
    key_words=t3bkname.get(0.0, 'end').replace('\n', '')
    tb_user=t1name.get(0.0, 'end').replace('\n', '')
    tb_passwd=t2pass.get(0.0, 'end').replace('\n', '')
    try:
        options = webdriver.ChromeOptions()
        #add
        options.add_argument('--headless')  # 添加无头浏览 只有 无头浏览,才能截取长图

        options.add_experimental_option('excludeSwitches', ['enable-automation'])
        options.add_argument("--disable-blink-features=AutomationControlled")
        driver = webdriver.Chrome(executable_path=r'chromedriver.exe', options=options)
        # 使用控制器,打开淘宝
        driver.get('https://www.taobao.com/')
        driver.implicitly_wait(10)   # 得到文本搜索框
        input_tag = driver.find_element_by_id('q')
        input_tag.send_keys(key_words)     # 回车
        input_tag.send_keys(Keys.ENTER)    # 账号
        account_number = driver.find_element_by_id('fm-login-id')
        account_number.send_keys(tb_user)    # 密码
        password = driver.find_element_by_id('fm-login-password')
        password.send_keys(tb_passwd)      # 登录
        login_tag = driver.find_element_by_class_name('fm-btn')
        login_tag.click()
        number = 400
        # 循环拖动鼠标滚轮,使当前页的所有商品信息加载完成
        for line in range(20):
            js = 'window.scrollTo(0,%s)' % number
            number += 500
            driver.execute_script(js)
            time.sleep(0.1)    # 拿到商品信息最大的div
        goods_div = driver.find_element_by_id('mainsrp-itemlist')
        print(goods_div)     # 找到每个商品的item
        goods_list = goods_div.find_elements_by_class_name('item')
        print(goods_list)      # 循环拿到的所有商品
        csvhead = '''标题,地址,价格,链接,图片,店铺,付款\n'''
        with open(os.getcwd() + "\\myget\\" + '淘宝--%s.csv' % key_words, 'a', encoding='utf-8') as f:  # 将爬取的信息写入文件
            f.write(csvhead)
        for good in goods_list:
            global good_title
            global good_link
            global good_img       # 依次取出名字,价格,链接,图片,评论数
            good_title = good.find_element_by_css_selector('.row a').text.replace("\n", "")
            good_address = good.find_element_by_css_selector('.location').text.replace("\n", "")
            good_price = good.find_element_by_css_selector('.price').text.replace("\n", "")
            good_link = good.find_element_by_css_selector('.pic a').get_attribute('href')
            good_img = good.find_element_by_css_selector('.pic img').get_attribute('src')
            good_shop = good.find_element_by_css_selector('.shop a').text.replace("\n", "")
            good_pay_nummber = good.find_element_by_css_selector('.deal-cnt').text.replace("\n", "")
            goods = '''%s,%s,%s,%s,%s,%s,%s''' % (good_title, good_address, good_price, good_link, good_img, good_shop, good_pay_nummber)
#get csv info
            with open(os.getcwd()+"\\myget\\"+'淘宝--%s.csv' % key_words, 'a', encoding='utf-8') as f:  # 将爬取的信息写入文件
                f.write(goods + '\n')
            # 找到当前页的“下一页”按钮 # iframe = driver.find_element_by_id("CrossStorageClient-f7955755-64ae-4be3-abd3-f107401fb2b7") #
            # switch_to.frame进入到iframe里面去  # driver.switch_to.frame(iframe)
#get  cookie
            time.sleep(2)
            cookies = driver.get_cookies()
            cookies_list = []
            for cookie_dict in cookies:
                cookie = cookie_dict['name'] + '=' + cookie_dict['value']
                cookies_list.append(cookie)
            header_cookie = ';'.join(cookies_list)
            headers = {
                'cookie': header_cookie,
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
            }
            print(headers)
            with open('mycookie.txt', 'w', encoding="UTF-8") as f:
                f.write(header_cookie)
            time.sleep(2)
            with open('mycookie.txt', 'r') as fc:
                yanzheng = fc.read()
#down main small picture
            img_url = good_img
            print(good_img)
            headers = {
                'cookie': yanzheng,
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
            resimg = requests.get(img_url, headers=headers)
            # 下载图片
            with open(os.getcwd()+"\\myget\\"+'%s.jpg'%good_title, 'wb') as f:
                f.write(resimg.content)
        driver.find_element_by_class_name('item next').send_keys(Keys.ENTER)
    except Exception as f:
        print("over")
    finally:
         driver.close()

    #--------------------------------------part 2
def get_browser2detail():
    key_words = t3bkname.get(0.0, 'end').replace('\n', '')
    tb_user = t1name.get(0.0, 'end').replace('\n', '')
    tb_passwd = t2pass.get(0.0, 'end').replace('\n', '')
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # 添加无头浏览 只有 无头浏览,才能截取长图
    options.add_experimental_option('excludeSwitches', ['enable-automation'])
    options.add_argument("--disable-blink-features=AutomationControlled")
    driver = webdriver.Chrome(executable_path=r'chromedriver.exe', options=options)  # 使用控制器,打开淘宝
    driver.get('https://www.taobao.com/')
    driver.implicitly_wait(10)  # 得到文本搜索框
    input_tag = driver.find_element_by_id('q')
    input_tag.send_keys(key_words)  # 回车
    input_tag.send_keys(Keys.ENTER)  # 账号
    account_number = driver.find_element_by_id('fm-login-id')
    account_number.send_keys(tb_user)  # 密码
    password = driver.find_element_by_id('fm-login-password')
    password.send_keys(tb_passwd)  # 登录
    login_tag = driver.find_element_by_class_name('fm-btn')
    login_tag.click()
    number = 400  # 循环拖动鼠标滚轮,使当前页的所有商品信息加载完成
    for line in range(20):
        js = 'window.scrollTo(0,%s)' % number
        number += 500
        driver.execute_script(js)
        time.sleep(0.1)  # 拿到商品信息最大的div
    goods_div = driver.find_element_by_id('mainsrp-itemlist')
    print(goods_div)  # 找到每个商品的item
    goods_list = goods_div.find_elements_by_class_name('item')
    print(goods_list)  # 循环拿到的所有商品
    # try:
    good_link_list = []
    for good in goods_list:
        global good_title
        global good_link
        global good_img       # 依次取出名字,价格,链接,图片,评论数
        good_title = good.find_element_by_css_selector('.row a').text.replace("\n", "")
        good_link = good.find_element_by_css_selector('.pic a').get_attribute('href')
        time.sleep(3)
        good_link_list.append(good_link)
        for detailslink in good_link_list:  # driver.switch_to.window(driver.window_handles[i])  # 切换第二个网页 会用到它
            time.sleep(3)
            driver.get(detailslink)
            print(detailslink)
            width = driver.execute_script("return document.documentElement.scrollWidth")
            height = driver.execute_script("return document.documentElement.scrollHeight")
            print(width, height, '0')
            driver.set_window_size(width, height)
            time.sleep(3)  # 截图并关掉浏览器
            driver.save_screenshot(os.getcwd() + "\\myget\\" + 'detail%s.png' % good_title)
            good_link_list.pop(0)
            # driver.back()
    driver.close()

def saveit():
    username = t1name.get(0.0, 'end')
    username2 = username.replace('\n', '')
    password = t2pass.get(0.0, 'end')
    password2 = password.replace('\n', '')
    bookname = t3bkname.get(0.0, 'end')
    bookname2 = bookname.replace('\n', '')

    with open(r'firstinput\bookname.txt', 'w') as bkn2:
        bookname = bkn2.write(bookname2)
    with open(r'firstinput\username.txt', 'w') as usrn2:
        usrname = usrn2.write(username2)
    with open(r'firstinput\password.txt', 'w') as passd2:
        passwod = passd2.write(password2)
def loadold():
    with open(r'firstinput\bookname.txt', 'r') as bkn2old:
        booknameold = bkn2old.read()
    with open(r'firstinput\username.txt', 'r') as usrn2old:
        usrnameold = usrn2old.read()
    with open(r'firstinput\password.txt', 'r') as passd2old:
        passwodold = passd2old.read()
    t1name.delete(0.0,'end')
    t2pass.delete(0.0, 'end')
    t3bkname.delete(0.0, 'end')

    t1name.insert(0.0, usrnameold)
    t2pass.insert(0.0, passwodold)
    t3bkname.insert(0.0, booknameold)
#gui
root=tk.Tk()
root.title('淘宝商品信息下载器')
root.geometry()
l1=tk.Label(root,text='填商品名称如螺丝刀,要退出则先关闭谷歌浏览器,再点退出.')
l1.grid(row=0,column=0,columnspan=3)
l1name=tk.Label(root,text='淘宝用户名:')
l1name.grid(row=1,column=0)
t1name=tk.Text(root,width=16,height=1)
t1name.grid(row=1,column=1)
l2pass=tk.Label(root,text='淘宝密码:')
l2pass.grid(row=1,column=2)
t2pass=tk.Text(root,width=16,height=1)
t2pass.grid(row=1,column=3)

l3nkname=tk.Label(root,text='商品名称:')
l3nkname.grid(row=3,column=0)
t3bkname=tk.Text(root,width=36,height=1)
t3bkname.grid(row=3,column=1,columnspan=3)
#columnspan=3
def qc():
    t3bkname.delete(0.0,'end')
b3=tk.Button(root,text="清除商品名称",width=14,command=qc)
b3.grid(row=4,column=2)

def will():
    start_directoryp = os.getcwd() + '\myget'
    os.system("explorer.exe %s" % start_directoryp)
    # quit()
b3=tk.Button(root,text="取用上次填写",width=14,command=loadold)
b3.grid(row=4,column=3)
b1=tk.Button(root,text="提交下载申请",width=14,command=saveit)
b1.grid(row=4,column=1)
b2=tk.Button(root,text="开始下载信息",width=14,command=get_browser)
b2.grid(row=5,column=1)
b2=tk.Button(root,text="随机详情页图",width=14,command=get_browser2detail)
b2.grid(row=5,column=3)
b210=tk.Button(root,text="查看下载内容",width=14,command=will)
b210.grid(row=5,column=2)

root.mainloop()

Rogabet-note高级版2022726链:https://pan.baidu.com/s/19Lyg8vmeZtce1sDEvrNAQQ?pwd=0726
功能:  存md(+word),txt,xlsx,html,pdf,note加密,批量取图中文字,批量录音转字,定时存,云保存,插入机器硬件信息,提取docx,pdf,pptx,xlsx,ofd里文字插入,朗读,加密,收发微信\邮件可查计时录音,切割wav,不定长语音识别,批量word\xls\ppt转pdf(可逆),并割txt,批量发微信,下网页或zip,找文件(词\像素),网页长图,批量ppt2jpg,日历行程,各种解方程,网址收藏,md->html->pdf,字转二维码,股票k爬虫,去水印 ,资源管理选多文件复制可贴路径文件名表.资产管理,表排序,比对列表差异,表转置,表并拆,表格数据对比,csv/xlsx/参数转图表,批量表求和,筛选抽取表指定行,批量改多表格单元格,一列成n列表,表多行运算,透视表,安排表的列顺序 翻译word-xlsx-ppt-pdf保留板式等.

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

rogabet-note

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值