inform_table.py
1 # -*-coding:utf8-*- 2 3 from selenium import webdriver 4 from selenium.webdriver.common.proxy import Proxy 5 from selenium.webdriver.common.proxy import ProxyType 6 import time 7 from bs4 import BeautifulSoup 8 9 10 class InformTable: 11 def __init__(self): 12 self.proxy = Proxy({ 'proxyType': ProxyType.MANUAL, 'httpProxy': '115.153.15.128:45491'}) 13 self.browser = webdriver.Firefox(proxy=self.proxy) 14 self.url = 'https://www.tianyancha.com/' 15 self.user = '17862977887' 16 self.pwd = 'oookkk09' 17 self.isFirstSearch = True 18 self.company = '' 19 20 self.url_dic = { 'New_Message': '/html/body/div[1]/div/span', 21 'New_submit': '/html/body/div[1]/div/i', 22 'Advertisement': '//*[@id="tyc_banner_close"]', 23 'get_login': '/html/body/div[1]/div/div[1]/div[1]/div/div/div[2]/div/div[4]/a', 24 'login_user_pwd': '/html/body/div[6]/div[2]/div/div[2]/div/div/div[3]/div[1]/div[2]', 25 'login_user': '/html/body/div[6]/div[2]/div/div[2]/div/div/div[3]/div[2]/div[2]/input', 26 'login_pwd': '/html/body/div[6]/div[2]/div/div[2]/div/div/div[3]/div[2]/div[3]/input', 27 'login_submit': '/html/body/div[6]/div[2]/div/div[2]/div/div/div[3]/div[2]/div[5]', 28 'login_state': '/html/body/div[1]/div/div[1]/div[1]/div/div/div[2]/div/div[4]/a', 29 'first_search_text': '//*[@id="home-main-search"]', 30 'first_search_submit': '/html/body/div[1]/div/div[1]/div[2]/div/div/div[2]/div[2]/div[1]/div', 31 'second_search_text': '//*[@id="header-company-search"]', 32 'second_search_submit': '/html/body/div[1]/div/div[2]/div/div[2]/div[1]/div', 33 'company_name': '/html/body/div[2]/div/div[1]/div/div[3]/div[1]/div/div[3]/div[1]/a/em', 34 'company_history_name': '/html/body/div[2]/div/div[1]/div/div[3]/div[1]/div/div[3]/div[4]/span[2]/em', 35 'company_url': '/html/body/div[2]/div/div[1]/div/div[3]/div[1]/div/div[3]/div[1]/a', 36 'holding_url': '/html/body/div[2]/div[1]/div/div[3]/div[1]/div/div[2]/div[1]/div[5]/div[2]', 37 'holding_name': '/html/body/div[2]/div[1]/div/div[3]/div[1]/div/div[2]/div[1]/div[5]/div[1]', 38 'financing_url': '/html/body/div[2]/div/div[2]/div[1]/div/div[2]/div[5]/div[2]/div', 39 'financing_name': '/html/body/div[2]/div/div[2]/div[1]/div/div[2]/div[5]/div[1]/span[1]', 40 'project_url': '/html/body/div[2]/div/div[2]/div[1]/div/div[2]/div[6]/div[2]/div', 41 'project_name': '/html/body/div[2]/div/div[2]/div[1]/div/div[2]/div[6]/div[1]/span[1]', 42 '2_to_3': '/html/body/div[2]/div[1]/div/div[2]/div[1]/div[2]/div[3]/div[4]/div[1]'} 43 self.js = { 'page_state': 'return document.readyState'} 44 45 # 关闭新消息提醒 46 def has_new(self): 47 try: 48 if u'新的动态' in self.browser.find_element_by_xpath(self.url_dic['New_Message']).text: 49 self.browser.find_element_by_xpath(self.url_dic['New_submit']).click() 50 self.browser.back() 51 except Exception: 52 pass 53 54 # 关闭底部风险提示 55 def advertisement(self): 56 try: 57 self.browser.find_element_by_xpath(self.url_dic['Advertisement']).click() 58 except Exception: 59 pass 60 61 # 等待页面加载完成 62 def complete(self): 63 while self.browser.execute_script(self.js['page_state']) != 'complete': 64 time.sleep(0.001) 65 66 # 验证登录状态 67 def login_success(self): 68 try: 69 while u'登录' in self.browser.find_element_by_xpath(self.url_dic['login_state']).text: 70 time.sleep(0.5) 71 except Exception: 72 pass 73 74 # 用户登录 75 def login(self): 76 # 访问网址 77 self.browser.get(self.url) 78 self.complete() 79 # 弹出登录界面 80 self.browser.find_elements_by_xpath(self.url_dic['get_login'])[0].click() 81 # 选择登录方式并登录 82 while True: 83 try: 84 self.browser.find_element_by_xpath(self.url_dic['login_user_pwd']).click() 85 break 86 except Exception: 87 time.sleep(1) 88 self.browser.find_element_by_xpath(self.url_dic['login_user']).send_keys(self.user) 89 self.browser.find_element_by_xpath(self.url_dic['login_pwd']).send_keys(self.pwd) 90 self.browser.find_element_by_xpath(self.url_dic['login_submit']).click() 91 # 验证登录状态 92 print '正在登录……' 93 self.login_success() 94 # 确保登录成功 95 time.sleep(30) 96 # 除去新消息 97 self.has_new() 98 99 # 搜索企业 100 def search_company(self, company): 101