自动化爬虫seleium获取tianyancha数据

1.seleium进入tianyancha

2.代码


import xlwt
from selenium import webdriver

from selenium.webdriver.common.keys import Keys
import time

columns = ['公司名称','法定代表人','统一社会信用代码','营业期限','公司类型','参保人数','曾用名','注册地址','经营范围','经营状态','成立日期','注册资本','实缴资本','纳税人识别号','纳税人资质','行业','登记机关','英文名称','工商注册号',
           '组织机构代码','核准日期','人员规模']
workbook = xlwt.Workbook(encoding="utf-8")
worksheet = workbook.add_sheet('My Worksheet')
for k in range(len(columns)):
    worksheet.write(0, k, columns[k])
    workbook.save('Tian_yan_cha.xls')


def search_product():
    driver = webdriver.Chrome()
    driver.get("https://www.tianyancha.com/login")
    driver.maximize_window()  # 窗口最大化
    print("等待扫码登录......")
    time.sleep(30)  # 等待扫码登录或者输入密码登录
    b = []
    a = 0
    with open("a.txt", 'r', encoding="utf-8") as f:
        t = f.read()
        for i in t.split("\n"):
            b.append(i)
        print(b)
    for j in b:
        a = a + 1
        if a == 1:
            driver.find_element_by_id('home-main-search').send_keys(b[a - 1])  # key为传的形参
            driver.find_element_by_class_name("input-group-btn").click()  # 点击搜索按钮
            worksheet.write(a, 0, b[a - 1])
            workbook.save('Tian_yan_cha.xls')
            name = driver.find_element_by_xpath('//div[@class="content"]/div[3]/div[1]/a').text
            print(name)
            worksheet.write(a, 1, name)
            workbook.save('Tian_yan_cha.xls')
            driver.find_element_by_xpath('//div[@class="result-list sv-search-container"]/div[1]//div[@class="content"]/div[@class="header"]/a').send_keys(Keys.ENTER)

            windows = driver.window_handles
            driver.switch_to.window(windows[-1])
            time.sleep(3)
            c = driver.find_element_by_xpath('//div[@id="_container_baseInfo"]/table/tbody/tr[5]/td[2]').text
            worksheet.write(a, 2, c)
            workbook.save('Tian_yan_cha.xls')

            d = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[6]/td[2]/span').text
            worksheet.write(a, 3, d)
            workbook.save('Tian_yan_cha.xls')

            e = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[7]/td[2]').text
            worksheet.write(a, 4, e)
            workbook.save('Tian_yan_cha.xls')

            f = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[8]/td[2]').text
            worksheet.write(a, 5, f)
            workbook.save('Tian_yan_cha.xls')

            g = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[9]/td[2]').text
            worksheet.write(a, 6, g)
            workbook.save('Tian_yan_cha.xls')

            h = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[10]/td[2]').text
            worksheet.write(a, 7, h)
            workbook.save('Tian_yan_cha.xls')

            i = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[11]/td[2]/span').text
            worksheet.write(a, 8, i)
            workbook.save('Tian_yan_cha.xls')

            j = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[1]/td[4]').text
            worksheet.write(a, 9, j)
            workbook.save('Tian_yan_cha.xls')

            k = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[2]/td[2]').text
            worksheet.write(a, 10, k)
            workbook.save('Tian_yan_cha.xls')

            l = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[3]/td[2]/div').text
            worksheet.write(a, 11, l)
            workbook.save('Tian_yan_cha.xls')

            m = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[4]/td[2]').text
            worksheet.write(a, 12, m)
            workbook.save('Tian_yan_cha.xls')

            n = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[5]/td[4]').text
            worksheet.write(a, 13, n)
            workbook.save('Tian_yan_cha.xls')

            o = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[6]/td[4]').text
            worksheet.write(a, 14, o)
            workbook.save('Tian_yan_cha.xls')

            p = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[7]/td[4]').text
            worksheet.write(a, 15, p)
            workbook.save('Tian_yan_cha.xls')

            q = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[8]/td[4]').text
            worksheet.write(a, 16, q)
            workbook.save('Tian_yan_cha.xls')

            r = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[9]/td[4]').text
            worksheet.write(a, 17, r)
            workbook.save('Tian_yan_cha.xls')

            s = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[4]/td[4]').text
            worksheet.write(a, 18, s)
            workbook.save('Tian_yan_cha.xls')

            t = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[5]/td[6]').text
            worksheet.write(a, 19, t)
            workbook.save('Tian_yan_cha.xls')

            u = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[6]/td[6]').text
            worksheet.write(a, 20, u)
            workbook.save('Tian_yan_cha.xls')

            v = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[7]/td[6]').text
            worksheet.write(a, 21, v)
            workbook.save('Tian_yan_cha.xls')
			driver.close()
            driver.switch_to.window(windows[0])
            print("1")
        else:
            driver.find_element_by_class_name("clear-input").click()  # 清空搜索框中的内容
            driver.find_element_by_id('header-company-search').send_keys(b[a - 1])  # key为传的形参
            driver.find_element_by_class_name("input-group-btn").click()  # 点击搜索按钮
            worksheet.write(a, 0, b[a - 1])
            workbook.save('Tian_yan_cha.xls')

            name = driver.find_element_by_xpath('//div[@class="content"]/div[3]/div[1]/a').text
            worksheet.write(a, 1, name)
            workbook.save('Tian_yan_cha.xls')

            driver.find_element_by_xpath('//div[@class="result-list sv-search-container"]/div[1]//div[@class="content"]/div[@class="header"]/a').send_keys(Keys.ENTER)

            windows = driver.window_handles
            driver.switch_to.window(windows[-1])
            
            c = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[5]/td[2]').text
            worksheet.write(a, 2, c)
            workbook.save('Tian_yan_cha.xls')

            d = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[6]/td[2]/span').text
            worksheet.write(a, 3, d)
            workbook.save('Tian_yan_cha.xls')

            e = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[7]/td[2]').text
            worksheet.write(a, 4, e)
            workbook.save('Tian_yan_cha.xls')

            f = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[8]/td[2]').text
            worksheet.write(a, 5, f)
            workbook.save('Tian_yan_cha.xls')

            g = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[9]/td[2]').text
            worksheet.write(a, 6, g)
            workbook.save('Tian_yan_cha.xls')

            h = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[10]/td[2]').text
            worksheet.write(a, 7, h)
            workbook.save('Tian_yan_cha.xls')

            i = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[11]/td[2]/span').text
            worksheet.write(a, 8, i)
            workbook.save('Tian_yan_cha.xls')

            j = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[1]/td[4]').text
            worksheet.write(a, 9, j)
            workbook.save('Tian_yan_cha.xls')

            k = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[2]/td[2]').text
            worksheet.write(a, 10, k)
            workbook.save('Tian_yan_cha.xls')

            l = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[3]/td[2]/div').text
            worksheet.write(a, 11, l)
            workbook.save('Tian_yan_cha.xls')

            m = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[4]/td[2]').text
            worksheet.write(a, 12, m)
            workbook.save('Tian_yan_cha.xls')

            n = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[5]/td[4]').text
            worksheet.write(a, 13, n)
            workbook.save('Tian_yan_cha.xls')

            o = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[6]/td[4]').text
            worksheet.write(a, 14, o)
            workbook.save('Tian_yan_cha.xls')

            p = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[7]/td[4]').text
            worksheet.write(a, 15, p)
            workbook.save('Tian_yan_cha.xls')

            q = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[8]/td[4]').text
            worksheet.write(a, 16, q)
            workbook.save('Tian_yan_cha.xls')

            r = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[9]/td[4]').text
            worksheet.write(a, 17, r)
            workbook.save('Tian_yan_cha.xls')

            s = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[4]/td[4]').text
            worksheet.write(a, 18, s)
            workbook.save('Tian_yan_cha.xls')

            t = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[5]/td[6]').text
            worksheet.write(a, 19, t)
            workbook.save('Tian_yan_cha.xls')

            u = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[6]/td[6]').text
            worksheet.write(a, 20, u)
            workbook.save('Tian_yan_cha.xls')

            v = driver.find_element_by_xpath('//table[@class="table -striped-col -breakall"]//tr[7]/td[6]').text
            worksheet.write(a, 21, v)
            workbook.save('Tian_yan_cha.xls')
			driver.close()
            driver.switch_to.window(windows[0])
            print("2")
    driver.quit()


if __name__ == '__main__':
    search_product()

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

萧鼎

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值