from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver .common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from PIL import Image
from io import BytesIO
from lxml import etree
import time
import pymssql
class TianYanLogin(object):
def __init__(self):
self.url = 'https://www.tianyancha.com/search?key=阿里巴巴'
self.browser = webdriver.Chrome()
self.conn = pymssql.connect(host='39.97.106.44', user='yishan1', password='yishan2018.', database='CFCMangeSoft')
self.cur = self.conn.cursor()
self.wait = WebDriverWait(self.browser, 20)
# def __del__(self):
# self.browser.close()
def login(self):
self.browser.get(self.url)
self.browser.maximize_window()
time.sleep(1)
zhanghao = self.browser.find_elements_by_xpath("//div[@class='module module1 module2 loginmodule collapse in']/div[@class='title-tab text-center']/div[@class='title']")[0]
zhanghao.click()
time.sleep(1)
my_phone = self.browser.find_element_by_xpath("//*[@id='web-content']/div/div[2]/div/div[2]/div/div[3]/div[2]/div[2]/input")
my_phone.click()
time.sleep(1)
my_phone.send_keys('154366')
my_password = self.browser.find_element_by_xpath("//*[@id='web-content']/div/div[2]/div/div[2]/div/div[3]/div[2]/div[3]/input")
my_password.click()
time.sleep(1)
my_password.send_keys('2355wazh')
def button(self):
'''
返回登录按钮
:return:
'''
time.sleep(3)
buttuon = self.browser.find_element_by_xpath("//*[@id='web-content']/div/div[2]/div/div[2]/div/div[3]/div[2]/div[5]")
buttuon.click()
def get_position(self):
'''
获取验证码位置
:return: 验证码位置元祖
'''
# time.sleep(3)
# img = self.browser.find_element_by_xpath("//*[@class='gt_popup_wrap']/div[2]")
# lcoation = img.location
# size = img.size
# top, bottom, left, right = lcoation['y'], lcoation['y'] + size['height'], lcoation['x'], lcoation['x']-400 + size['width'] + 400
top = 340
bottom = 510
left = 750
right = 1140
return (top, bottom, left, right)
def get_spider(self):
'''
获取拼图
:return: 返回拼图
'''
time.sleep(3)
silder = self.browser.find_element_by_xpath("//div[10]/div[2]/div[2]/div[1]/div[2]/div[1]/a[1]/div[2]")
return silder
def get_geetest_image(self, name='captcha.png'):
"""
获取验证码图片
:return: 图片对象
"""
top, bottom, left, right = self.get_position()
print('验证码位置', top, bottom, left, right)
screenshot = self.get_screenshot()
captcha = screenshot.crop((left, top, right, bottom))
captcha.save(name)
return captcha
def get_screenshot(self):
'''
获取网页截图
:return: 截图对象
'''
time.sleep(3)
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_slide(self):
'''
获取滑动按钮
:return: 滑动按钮
'''
time.sleep(3)
slide = self.browser.find_element_by_xpath("//div[10]/div[2]/div[2]/div[2]/div[2]")
return slide
def is_pixel_equal(self,image1,image2,x,y):
'''
判断两个图片像素是否一样
:param image1: 没有缺口的图片
:param image2: 有缺口的图片
:param x: 位置x
:param y: 位置y
:return: 像素是否相同
'''
pixel1 = image1.load()[x, y]
pixel2 = image2.load()[x, y]
threshold = 60
if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
pixel1[2] - pixel2[2]) < threshold:
return True
else:
return False
def get_gap(self, image1, image2):
"""
获取缺口偏移量
:param image1: 带缺口图片
:param image2: 不带缺口图片
:return:
"""
left = 80
# 判断有缺口和没有缺口的图片像素是否一致
# image1.size[0]是x轴 [1]是y轴
print(image1.size[0])
print(image1.size[1])
for i in range(left,image1.size[0]):
for j in range(image1.size[1]):
# print(self.is_pixel_equal(image1, image2, i, j))
if not self.is_pixel_equal(image1, image2, i, j):
left = i
if left == 80:
for i in range(90, image1.size[0]):
for j in range(image1.size[1]):
# print(self.is_pixel_equal(image1, image2, i, j))
if not self.is_pixel_equal(image1, image2, i, j):
left = i
return left
def get_track(self, distance):
"""
根据偏移量获取移动轨迹
:param distance: 偏移量
:return: 移动轨迹
"""
# 移动轨迹
track = []
# 当前位移
current = 0
# 减速阈值
mid = distance * 4 / 5
# 计算间隔
t = 0.2
# 初速度
v = 0
while current < distance:
if current < mid:
# 加速度为正2
a = 2
else:
# 加速度为负3
a = -3
# 初速度v0
v0 = v
# 当前速度v = v0 + at
v = v0 + a * t
# 移动距离x = v0t + 1/2 * a * t^2
move = v0 * t + 1 / 2 * a * t * t
# 当前位移
current += move
# 加入轨迹
track.append(round(move))
return track
def move_to_gap(self, slider, track):
time.sleep(1)
"""
拖动滑块到缺口处
:param slider: 滑块
:param track: 轨迹
:return:
"""
ActionChains(self.browser).click_and_hold(slider).perform()
num = 1
for x in track:
num += 1
if num == 10:
time.sleep(0.2)
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
time.sleep(1)
ActionChains(self.browser).release().perform()
time.sleep(1)
def chick(self):
self.login()
self.button()
img1 = self.get_geetest_image()
slide = self.get_slide()
slide.click()
img2 = self.get_geetest_image()
lefts = self.get_gap(img1, img2)
if 330 > lefts > 200:
lefts = lefts - 150
elif lefts > 330:
lefts = lefts - 170
elif 200 > lefts > 100:
lefts = lefts - 110
else:
lefts = lefts - 70
tack = self.get_track(lefts)
self.move_to_gap(slide, tack)
l = None
for i in range(4):
time.sleep(1)
if '失败' in self.browser.page_source:
lefts = self.get_gap(img1, img2)
if i == 0:
if 330 > lefts > 200:
lefts = lefts - 140
elif lefts > 330:
lefts = lefts - 160
elif 200 > lefts > 100:
lefts = lefts - 100
else:
lefts = lefts - 40
elif i == 1:
if 330 > lefts > 200:
lefts = lefts - 130
elif lefts > 330:
lefts = lefts - 150
elif 200 > lefts > 100:
lefts = lefts - 90
else:
lefts = lefts - 50
elif i == 2:
if 330 > lefts > 200:
lefts = lefts - 160
elif lefts > 330:
lefts = lefts - 180
elif 200 > lefts > 100:
lefts = lefts - 120
else:
lefts = lefts - 65
tack = self.get_track(lefts)
self.move_to_gap(slide,tack)
time.sleep(1)
elif '重试' in self.browser.page_source:
img1 = self.get_geetest_image()
slide = self.get_slide()
slide.click()
img2 = self.get_geetest_image()
lefts = self.get_gap(img1, img2)
if 330 > lefts > 200:
lefts = lefts - 150
elif lefts > 330:
lefts = lefts - 170
elif 200 > lefts > 100:
lefts = lefts - 110
else:
lefts = lefts - 70
print("xixixiixix")
print(lefts)
tack = self.get_track(lefts)
self.move_to_gap(slide, tack)
time.sleep(1)
else:
l = '通过'
if not l:
self.chick()
else:
print("这里面进来了码")
self.begin()
def begin(self):
sql = """select top 200 oname from OrganizationInfo_similarityZhu where oname like '%公司%' and img is not null"""
self.cur.execute(sql)
datas = self.cur.fetchall()
for data in datas:
print('这里就没有动码')
time.sleep(1)
name = data[0]
self.browser.find_element_by_xpath("//input[@id='header-company-search']").clear()
self.browser.find_elements_by_xpath("//input[@id='header-company-search']")[0].send_keys(name)
self.browser.find_element_by_xpath("//div[@class='input-group-btn btn -sm btn-primary']").click()
response = etree.HTML(self.browser.page_source)
img = response.xpath(
"//*[@id='web-content']/div/div[1]/div[3]/div[2]/div[1]/div/div[2]/div/div[2]/img/@data-src")
if not img:
img = response.xpath(
"//*[@id='web-content']/div/div[1]/div[4]/div[2]/div[1]/div/div[2]/div/div[2]/img/@data-src")
if img:
img = img[0]
sql = """update OrganizationInfo_similarityZhu set img='{}' where oname='{}'""".format(img, name)
print(sql)
self.cur.execute(sql)
self.conn.commit()
else:
pass
if __name__ == '__main__':
tianyan = TianYanLogin()
tianyan.chick()
计算下面拖动按钮偏移量的时候 花了好大功夫 但是还是不够完善