爬取站长之家域名并保存到mysql

![爬取站长之家域名并保存到mysql*
使用环境
python3.73,selenium,mysql,chrome
代码如下,

import time
import pymysql
from selenium import webdriver
import io
import sys

from selenium.webdriver.chrome.options import Options

sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')
def search():
        options = Options()
        # 无头-不显示浏览器窗口
        options.add_argument("--headless")
        path = "C:\Program Files (x86)\Google\Chrome\Application\chromedriver"
        chrome = webdriver.Chrome(executable_path=path, options=options)
        #爬取页数
        for i in range(1,11):
            chrome.get(f"http://icp.chinaz.com/provinces?&companytype=&city=%u5168%u56FD&custype=0&companyName=&page={i}",)
            #print(chrome.page_source)
            #x=chrome.find_element_by_xpath("//tbody[@id='result_table']/tr")
            #print(x.text)
            #爬取条数
            for d in range(1,21):
                domain_list = chrome.find_element_by_xpath(f"//tbody[@id='result_table']/tr[{d}]/td[1]/a")
                domain_list=domain_list.text
                organizer_list =chrome.find_element_by_xpath(f"//tbody[@id='result_table']/tr[{d}]/td[2]")
                organizer_list=organizer_list.text
                nature_list = chrome.find_element_by_xpath(f"//tbody[@id='result_table']/tr[{d}]/td[3]")
                nature_list=nature_list.text
                license_list = chrome.find_element_by_xpath(f"//tbody[@id='result_table']/tr[{d}]/td[5]")
                license_list=license_list.text
                web_name_list = chrome.find_element_by_xpath(f"//tbody[@id='result_table']/tr[{d}]/td[6]")
                web_name_list=web_name_list.text
                web_domain_list = chrome.find_element_by_xpath(f"//tbody[@id='result_table']/tr[{d}]/td[7]/span/a")
                web_domain_list= web_domain_list.text
                verify_time_list =chrome.find_element_by_xpath(f"//tbody[@id='result_table']/tr[{d}]/td[8]")
                verify_time_list=verify_time_list.text
                #连接数据库
                connection = pymysql.connect(host='127.0.0.1', user='root', password='', database='yu', charset="utf8")
                try:
                    with connection.cursor() as cursor:
                        sql = f"INSERT INTO yu(domain_list,organizer_list,nature_list,license_list, web_name_list,web_domain_list,verify_time_list) VALUES (%s,%s,%s,%s,%s,%s,%s)"
                        cursor.execute(sql, (domain_list, organizer_list, nature_list, license_list, web_name_list, web_domain_list,verify_time_list))
                        connection.commit()
                finally:
                    connection.close()
                print(domain_list, organizer_list, nature_list, license_list, web_name_list, web_domain_list, verify_time_list)

        chrome.close()
if __name__=="__main__":
    #循环开启
    while True:
        search()
        #每隔多长时间爬一次
        time.sleep()

在这里插入图片描述

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值