python应用:selenium之爬取天眼查信息

inform_table.py

  1 # -*-coding:utf8-*-
  2 
  3 from selenium import webdriver
  4 from selenium.webdriver.common.proxy import Proxy
  5 from selenium.webdriver.common.proxy import ProxyType
  6 import time
  7 from bs4 import BeautifulSoup
  8 
  9 
 10 class InformTable:
 11     def __init__(self):
 12         self.proxy = Proxy({
   'proxyType': ProxyType.MANUAL, 'httpProxy': '115.153.15.128:45491'})
 13         self.browser = webdriver.Firefox(proxy=self.proxy)
 14         self.url = 'https://www.tianyancha.com/'
 15         self.user = '17862977887'
 16         self.pwd = 'oookkk09'
 17         self.isFirstSearch = True
 18         self.company = ''
 19 
 20         self.url_dic = {
   'New_Message': '/html/body/div[1]/div/span',
 21                         'New_submit': '/html/body/div[1]/div/i',
 22                         'Advertisement': '//*[@id="tyc_banner_close"]',
 23                         'get_login': '/html/body/div[1]/div/div[1]/div[1]/div/div/div[2]/div/div[4]/a',
 24                         'login_user_pwd': '/html/body/div[6]/div[2]/div/div[2]/div/div/div[3]/div[1]/div[2]',
 25                         'login_user': '/html/body/div[6]/div[2]/div/div[2]/div/div/div[3]/div[2]/div[2]/input',
 26                         'login_pwd': '/html/body/div[6]/div[2]/div/div[2]/div/div/div[3]/div[2]/div[3]/input',
 27                         'login_submit': '/html/body/div[6]/div[2]/div/div[2]/div/div/div[3]/div[2]/div[5]',
 28                         'login_state': '/html/body/div[1]/div/div[1]/div[1]/div/div/div[2]/div/div[4]/a',
 29                         'first_search_text': '//*[@id="home-main-search"]',
 30                         'first_search_submit': '/html/body/div[1]/div/div[1]/div[2]/div/div/div[2]/div[2]/div[1]/div',
 31                         'second_search_text': '//*[@id="header-company-search"]',
 32                         'second_search_submit': '/html/body/div[1]/div/div[2]/div/div[2]/div[1]/div',
 33                         'company_name': '/html/body/div[2]/div/div[1]/div/div[3]/div[1]/div/div[3]/div[1]/a/em',
 34                         'company_history_name': '/html/body/div[2]/div/div[1]/div/div[3]/div[1]/div/div[3]/div[4]/span[2]/em',
 35                         'company_url': '/html/body/div[2]/div/div[1]/div/div[3]/div[1]/div/div[3]/div[1]/a',
 36                         'holding_url': '/html/body/div[2]/div[1]/div/div[3]/div[1]/div/div[2]/div[1]/div[5]/div[2]',
 37                         'holding_name': '/html/body/div[2]/div[1]/div/div[3]/div[1]/div/div[2]/div[1]/div[5]/div[1]',
 38                         'financing_url': '/html/body/div[2]/div/div[2]/div[1]/div/div[2]/div[5]/div[2]/div',
 39                         'financing_name': '/html/body/div[2]/div/div[2]/div[1]/div/div[2]/div[5]/div[1]/span[1]',
 40                         'project_url': '/html/body/div[2]/div/div[2]/div[1]/div/div[2]/div[6]/div[2]/div',
 41                         'project_name': '/html/body/div[2]/div/div[2]/div[1]/div/div[2]/div[6]/div[1]/span[1]',
 42                         '2_to_3': '/html/body/div[2]/div[1]/div/div[2]/div[1]/div[2]/div[3]/div[4]/div[1]'}
 43         self.js = {
   'page_state': 'return document.readyState'}
 44 
 45     # 关闭新消息提醒
 46     def has_new(self):
 47         try:
 48             if u'新的动态' in self.browser.find_element_by_xpath(self.url_dic['New_Message']).text:
 49                 self.browser.find_element_by_xpath(self.url_dic['New_submit']).click()
 50                 self.browser.back()
 51         except Exception:
 52             pass
 53 
 54     # 关闭底部风险提示
 55     def advertisement(self):
 56         try:
 57             self.browser.find_element_by_xpath(self.url_dic['Advertisement']).click()
 58         except Exception:
 59             pass
 60 
 61     # 等待页面加载完成
 62     def complete(self):
 63         while self.browser.execute_script(self.js['page_state']) != 'complete':
 64             time.sleep(0.001)
 65 
 66     # 验证登录状态
 67     def login_success(self):
 68         try:
 69             while u'登录' in self.browser.find_element_by_xpath(self.url_dic['login_state']).text:
 70                 time.sleep(0.5)
 71         except Exception:
 72             pass
 73 
 74     # 用户登录
 75     def login(self):
 76         # 访问网址
 77         self.browser.get(self.url)
 78         self.complete()
 79         # 弹出登录界面
 80         self.browser.find_elements_by_xpath(self.url_dic['get_login'])[0].click()
 81         # 选择登录方式并登录
 82         while True:
 83             try:
 84                 self.browser.find_element_by_xpath(self.url_dic['login_user_pwd']).click()
 85                 break
 86             except Exception:
 87                 time.sleep(1)
 88         self.browser.find_element_by_xpath(self.url_dic['login_user']).send_keys(self.user)
 89         self.browser.find_element_by_xpath(self.url_dic['login_pwd']).send_keys(self.pwd)
 90         self.browser.find_element_by_xpath(self.url_dic['login_submit']).click()
 91         # 验证登录状态
 92         print '正在登录……'
 93         self.login_success()
 94         # 确保登录成功
 95         time.sleep(30)
 96         # 除去新消息
 97         self.has_new()
 98 
 99     # 搜索企业
100     def search_company(self, company):
101 
  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值