seriously。。。为了要高点工商信息~~me也是拼了~~~~
没办法,只能破解了,还好网上很早就有人把这个破了~~开心~~~
所以本人也就本着一边抄袭一边学习的方法,把这个破玩意给破了~~~哈哈哈~~~
不过由于本人想看电视,不想train一个behaviou model出来~~所以就偷懒了~~~~
直接用random int 的方法来搞定这个模拟人类的拖拽过程,
网上有人用了linear regression来模拟也有用deep learning 来做的,还有透过一个tanh函数搞定的~~~
因为是randint的关系。所以需要尝试多次会破解~~这个是不完美的地方,但本着搞数据的方向,这样我也觉得可以接受,重点是数据可以搞下来就好了~~~~
废话少说,直接上代码
# coding=utf-8
import sys;
reload(sys);
sys.setdefaultencoding('utf8');
import requests
import re
import StringIO
from PIL import Image
import random
import math
import time
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from bs4 import BeautifulSoup
class crack_picture(object):
def __init__(self, img_url1, img_url2):
self.img1, self.img2 = self.picture_get(img_url1, img_url2)
def picture_get(self, img_url1, img_url2):
hd = {"Host": "static.geetest.com",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"}
img1 = StringIO.StringIO(self.repeat(img_url1, hd).content)
img2 = StringIO.StringIO(self.repeat(img_url2, hd).content)
return img1, img2
def repeat(self, url, hd):
times = 10
while times > 0:
try:
ans = requests.get(url, headers=hd)
return ans
except:
times -= 1
def pictures_recover(self):
xpos = self.judge(self.picture_recover(self.img1, 'img1.jpg'), self.picture_recover(self.img2, 'img2.jpg')) - 6
return self.darbra_track(xpos)
def picture_recover(self, img, name):
a = [39, 38, 48, 49, 41, 40, 46, 47, 35, 34, 50, 51, 33, 32, 28, 29, 27, 26, 36, 37, 31, 30, 44, 45, 43, 42, 12,
13, 23, 22, 14, 15, 21, 20, 8, 9, 25, 24, 6, 7, 3, 2, 0, 1, 11, 10, 4, 5, 19, 18, 16, 17]
im = Image.open(img)
im_new = Image.new("RGB", (260, 116))
for row in range(2):
for column in range(26):
right = a[row * 26 + column] % 26 * 12 + 1
down = 58 if a[row * 26 + column] > 25 else 0
for w in range(10):
for h in range(58):
ht = 58 * row + h
wd = 10 * column + w
im_new.putpixel((wd, ht), im.getpixel((w + right, h + down)))
im_new.save(name)
return im_new
def darbra_track(self, distance):
return [[distance, 0.5, 1]]
# crucial trace code was deleted
def diff(self, img1, img2, wd, ht):
rgb1 = img1.getpixel((wd, ht))
rgb2 = img2.getpixel((wd, ht))
tmp = reduce(lambda x, y: x + y, map(lambda x: abs(x[0] - x[1]), zip(rgb1, rgb2)))
return True if tmp >= 200 else False
def col(self, img1, img2, cl):
for i in range(img2.size[1]):
if self.diff(img1, img2, cl, i):
return True
return False
def judge(self, img1, img2):
for i in range(img2.size[0]):
if self.col(img1, img2, i):
return i
return -1
class gsxt(object):
def __init__(self, br_name="phantomjs"):
self.br = self.get_webdriver(br_name)
self.wait = WebDriverWait(self.br, 10, 1.0)
self.br.set_page_load_timeout(8)
self.br.set_script_timeout(8)
def input_params(self, name):
self.br.get("http://www.gsxt.gov.cn/index")
element = self.wait_for(By.ID, "keyword")
element.send_keys(name)
time.sleep(1.1)
element = self.wait_for(By.ID, "btn_query")
element.click()
time.sleep(1.1)
def drag_pic(self):
return (self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_fullbg_slice")),
self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_bg_slice")))
def wait_for(self, by1, by2):
return self.wait.until(EC.presence_of_element_located((by1, by2)))
def find_img_url(self, element):
try:
return re.findall('url\("(.*?)"\)', element.get_attribute('style'))[0].replace("webp", "jpg")
except:
return re.findall('url\((.*?)\)', element.get_attribute('style'))[0].replace("webp", "jpg")
def emulate_track(self, tracks):
element = self.br.find_element_by_class_name("gt_slider_knob")
ActionChains(self.br).click_and_hold(on_element=element).perform()
for x, y, t in tracks:
print x, y, t
track_list = self.get_track(x);
ActionChains(self.br).click_and_hold(on_element=element).perform()
time.sleep(0.15)
for track in track_list:
track_string = "{%d,%d}," % (track, y +22 )
# xoffset=track+22:这里的移动位置的值是相对于滑动圆球左上角的相对值,而轨迹变量里的是圆球的中心点,所以要加上圆球长度的一半。
# yoffset=y-445:这里也是一样的。不过要注意的是不同的浏览器渲染出来的结果是不一样的,要保证最终的计算后的值是22,也就是圆球高度的一半
ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=track + 22,
yoffset=y + 22).perform()
# 间隔时间也通过随机函数来获得
time.sleep(random.randint(10, 50) / 100)
print track_string
# ActionChains(self.br).move_to_element_with_offset(
# to_element=element,
# xoffset=x + 22,
# yoffset=y + 22).perform()
# ActionChains(self.br).click_and_hold().perform()
# time.sleep(t)
ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform()
time.sleep(0.1)
ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform()
time.sleep(0.1)
ActionChains(self.br).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y + 22).perform()
time.sleep(0.1)
time.sleep(0.24)
ActionChains(self.br).release(on_element=element).perform()
time.sleep(0.8)
element = self.wait_for(By.CLASS_NAME, "gt_info_text")
ans = element.text.encode("utf-8")
print ans
return ans
def run(self):
for i in [u'招商银行', u'交通银行', u'中国银行']:
self.hack_geetest(i)
time.sleep(1)
self.quit_webdriver()
def hack_geetest(self, company=u"招商银行"):
flag = True
self.input_params(company)
while flag:
img_url1, img_url2 = self.drag_pic()
tracks = crack_picture(img_url1, img_url2).pictures_recover()
tsb = self.emulate_track(tracks)
if '通过' in tsb:
time.sleep(1)
soup = BeautifulSoup(self.br.page_source, 'html.parser')
for sp in soup.find_all("a", attrs={"class": "search_list_item"}):
print re.sub("\s+", "", sp.get_text().encode("utf-8"))
# print sp.get_text()
break
elif '吃' in tsb:
time.sleep(5)
else:
self.input_params(company)
def quit_webdriver(self):
self.br.quit()
def get_webdriver(self, name):
if name.lower() == "phantomjs":
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36")
return webdriver.PhantomJS(desired_capabilities=dcap)
elif name.lower() == "chrome":
return webdriver.Chrome()
def get_track(self, len):
'''
根据缺口的位置模拟x轴移动的轨迹
'''
pass
list = []
# 间隔通过随机范围函数来获得
x = random.randint(1, 3)
while len - x >= 5:
list.append(x)
len = len - x
x = random.randint(1, 3)
for i in xrange(len):
list.append(1)
return list;
if __name__ == "__main__":
# print crack_picture("http://static.geetest.com/pictures/gt/fc064fc73/fc064fc73.jpg", "http://static.geetest.com/pictures/gt/fc064fc73/bg/7ca363b09.jpg").pictures_recover()
gsxt("chrome").run()
有几篇文章一定要谢谢的:
http://blog.csdn.net/mingzznet/article/details/54288288
https://www.zhihu.com/question/28833985
https://github.com/darbra/geetest