Selenium过某音滑块,获取s_v_web_id

        滑块只能用Selenium过,js逆向那套行不通,没办法,行情就是这样~~(来自某某经典语录)

        采集抖音过程中,需要携带cookie才能获取数据,cookie失效就会出现滑块验证码,如图:

        如果你的不出现验证码,可以去掉cookie,再去滚动翻页;

        用selenium过滑块,原理都差不多,获取原图,缺口图片,识别距离,再模拟滚动,滚动速度不要不变,要有加速度,更像人为操作。根据代码操作几遍就可以,注意需要修改自己的浏览器驱动路径driver_path;

代码如下:

import cv2
import numpy as np
import time
import requests
import os
from urllib.parse import urlparse
from selenium import webdriver
from selenium.webdriver import ActionChains



class Douyin_Slider(object):

    def __init__(self, bg, gap):
        
        
        bg_size = (340, 212)
        gap_size = (68, 68)
        self.img_dir = os.path.join(os.getcwd(), 'image')
        self.bg = self.get_img_path(bg, 'bg', bg_size)
        self.gap = self.get_img_path(gap, 'gap', gap_size)
        self.out = os.path.join(self.img_dir, 'out.jpg')

    def get_img_path(self, img_path, img_name, resize):

        headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;"
                      "q=0.8,application/signed-exchange;v=b3;q=0.9",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "zh-CN,zh;q=0.9,en-GB;q=0.8,en;q=0.7,ja;q=0.6",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Host": urlparse(img_path).hostname,
            "Referer": "https://www.douyin.com/",
            # "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0",
        }
        resp = requests.get(url=img_path, headers=headers)
        if resp.status_code == 200:
            img_path = f'./image/{img_name}.jpg'
            image = np.asarray(bytearray(resp.content), dtype="uint8")
            image = cv2.imdecode(image, cv2.IMREAD_COLOR)
            if resize:
                image = cv2.resize(image, dsize=resize)
                # pass
            cv2.imwrite(img_path, image)
            return img_path
        else:
            print('下载失败,状态码为:{}'.format(resp.status_code))

    @staticmethod
    def clear_white(img):
        img = cv2.imread(img)
        rows, cols, channel = img.shape
        min_x = 255
        min_y = 255
        max_x = 0
        max_y = 0
        for x in range(1, rows):
            for y in range(1, cols):
                t = set(img[x, y])
                if len(t) >= 2:
                    if x <= min_x:
                        min_x = x
                    elif x >= max_x:
                        max_x = x

                    if y <= min_y:
                        min_y = y
                    elif y >= max_y:
                        max_y = y
        img1 = img[min_x:max_x, min_y: max_y]
        return img1

    @staticmethod
    def image_edge_detection(img):
        edges = cv2.Canny(img, 100, 200)
        return edges

    def template_match(self, tpl, target):
        th, tw = tpl.shape[:2]
        result = cv2.matchTemplate(target, tpl, cv2.TM_CCOEFF_NORMED)
        # 寻找矩阵(一维数组当作向量,用Mat定义) 中最小值和最大值的位置
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        tl = max_loc
        br = (tl[0] + tw, tl[1] + th)
        # 绘制矩形边框,将匹配区域标注出来
        # target:目标图像
        # tl:矩形定点
        # br:矩形的宽高
        # (0,0,255):矩形边框颜色
        # 1:矩形边框大小
        cv2.rectangle(target, tl, br, (0, 0, 255), 2)
        cv2.imwrite(self.out, target)
        return tl[0]

    def discern(self):
        img1 = self.clear_white(self.gap)
        img1 = cv2.cvtColor(img1, cv2.COLOR_RGB2GRAY)
        slide = self.image_edge_detection(img1)

        back = cv2.imread(self.bg, 0)
        back = self.image_edge_detection(back)

        slide_pic = cv2.cvtColor(slide, cv2.COLOR_GRAY2RGB)
        back_pic = cv2.cvtColor(back, cv2.COLOR_GRAY2RGB)
        x = self.template_match(slide_pic, back_pic)
        # print('x: ', x)
        # 输出横坐标, 即 滑块在图片上的位置
        return x


def get_track(distance):
    """
    根据偏移量获取移动轨迹
    :param distance: 偏移量
    :return: 移动轨迹
    """
    # 移动轨迹
    track = []
    # 当前位移
    current = 0
    # 减速阈值
    mid = distance * 4 / 5
    # 计算间隔
    t = 0.35
    # 初速度
    v = 0

    while current < distance:
        if current < mid:
            # 加速度为正 2
            a = 5
        else:
            # 加速度为负 3
            a = -2.5
        # 初速度 v0
        v0 = v
        # 当前速度 v = v0 + at
        v = v0 + a * t
        move = v0 * t + 1 / 2 * a * t * t
        # 当前位移
        current += move
        # 加入轨迹
        track.append(round(move))
        # print '轨迹    ', track
    return track


def get_cookies():

    url = 'https://www.douyin.com/search/python?source=switch_tab&type=user'
    driver_path = r'E:\Python36\Scripts\geckodriver'
    option = webdriver.FirefoxOptions()
    # option.add_argument('--headless')  # 添加之后可以不显示浏览器界面
    option.add_argument('--disable-gpu')
    option.add_argument('--no-sandbox')
    # option.add_experimental_option('useAutomationExtension', False)
    option.add_argument("disable-blink-features")
    option.add_argument("disable-blink-features=AutomationControlled")
    driver = webdriver.Firefox(options=option, executable_path=driver_path)
    driver.get(url)
    time.sleep(2)
    while True:
        try:
            bg = driver.find_element_by_id('captcha-verify-image').get_attribute('src')
            gap = driver.find_element_by_xpath('//*[@id="captcha_container"]/div/div[2]/img[2]').get_attribute('src')
            slider = Douyin_Slider(bg=bg, gap=gap)
            distance = slider.discern()
            print('移动距离:', distance)
            slider = driver.find_element_by_xpath('//div[@id="secsdk-captcha-drag-wrapper"]/div[2]')
            ActionChains(driver).click_and_hold(slider).perform()
            _tracks = get_track(distance)
            print('开始验证....')
            # print(_tracks)
            new_1 = _tracks[-1] - (sum(_tracks) - distance)
            _tracks.pop()
            _tracks.append(new_1)
            # print(_tracks)
            for long in _tracks:
                ActionChains(driver).move_by_offset(long, 0).perform()
            ActionChains(driver).release().perform()
            time.sleep(0.5)
            # break
        except:
            break

    cookies = driver.get_cookies()
    cookie_dict = {}
    for cookie in cookies:
        k = cookie['name']
        v = cookie['value']
        cookie_dict[k] = v
    # print(cookie_dict)
    time.sleep(2)
    driver.close()
    driver.quit()
    cookie_dict = cookie_dict.get('s_v_web_id')
    return cookie_dict


if __name__ == '__main__':

    cookie = get_cookies()
    print(cookie)

运行结果如图:

评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

龙猫不打伞

感谢打赏,天道酬勤!!!

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值