企查查最新中标企业抓取
网站链接:标找找 - 企查查旗下一站式标讯服务平台
需要使用的python包
selenium
在未付费的情况下只能抓取最新50条数据
解析代码如下:
# -*- coding: utf-8 -*-
from selenium.webdriver import Chrome
def main():
driver = Chrome()
driver.get('https://www.biaozhaozhao.com/?tab=wtb_company')
for _ in range(10):
lis = driver.find_elements_by_xpath("//div[@class='tc_list_item tc_list_hover']")
for row in lis:
item = dict()
# 企业名称
item['ent_name'] = row.find_element_by_xpath("div/div/div[@class='FpwufnYn']/div/a/span").text
# 企业状态
item['ent_state'] = row.find_element_by_xpath("div/div[@class='_0aDOKXj6']/span").text
# 企业标签
item['tag'] = [i.text for i in row.find_elements_by_xpath("div/div/span[@class='ant-tag awpCsC3X']")]
# 招投标数
item['num'] = row.find_element_by_xpath("div/div/div[@class='WMSbIy8X']/span").text
# 法定代表人
item['name'] = row.find_element_by_xpath("div/div/div/ul[@class='ozeTtOeS']/li/a").text
# 注册资金
item['regcap'] = row.find_element_by_xpath("div/div/div/ul[@class='ozeTtOeS']/li[2]/span[2]").text
# 成立日期
item['date'] = row.find_element_by_xpath("div/div/div/ul[@class='ozeTtOeS']/li[3]/span[2]").text
# 公示
item['title'] = row.find_element_by_xpath("div/div/div/div/div[@class='lZ9dQcZO']/a").text
# 公示链接
item['url'] = row.find_element_by_xpath("div/div/div/div/div[@class='lZ9dQcZO']/a").get_attribute('href')
print(item)
driver.find_element_by_xpath("//i[@class='anticon anticon-right']").click()
# 点击下一页
driver.close()
if __name__ == '__main__':
main()
只解析了部分字段
结果: