近几天想注册一个网站,奈何域名选择成了问题,自己做了一个域名自动筛选的脚本 selenium+chromedriver

# -*- coding: utf-8 -*-
"""
Created on Tue Jul  7 11:15:08 2020

@author: anyiyu

"""

import re
import time
import random

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import pypinyin


def get_driver():
    driver = webdriver.Chrome(executable_path="../chromedriver.exe")
    url = "http://whois.chinaz.com/ziwei.com"
    driver.get(url)
    return driver

def find_uri(driver,s0=[],s1=[],s2=[]):
    
    
    s4=[]
    for li in s0:
        print(li)
        
        try:
            driver.find_element_by_id('DomainName').clear()
            driver.find_element_by_id('DomainName').send_keys("%s.com"%(li))
            
            WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CLASS_NAME,'search-write-btn')))
            
            driver.find_element_by_class_name('search-write-btn').click()
            result=""
            time.sleep(2)
            # 显示等待知道元素出现        
        
            WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CLASS_NAME,'search-write-btn')))
        
            result = driver.find_element_by_xpath("//*[@class='whoisl-wrap fl']")
        except Exception:
            s4.append(li)
            pass
        result = result if result else ""
        time.sleep(1)
        
        if result:
            s1.append(li)
        else:
            s2.append(li)
        
        s3=[]
        s2 = list(set(s2))  
        if not s4:
            s3,s2=find_uri(s4,[],s2)
        for x in s3:            
            s1.append(x)
    return s1,s2 




# 不带声调的(style=pypinyin.NORMAL)
def pinyin(word):
    s = ''
    for i in pypinyin.pinyin(word, style=pypinyin.NORMAL):
        s += ''.join(i)
    return s


# 带声调的(默认)
def yinjie(word):
    s = ''
    # heteronym=True开启多音字
    for i in pypinyin.pinyin(word, heteronym=True):
        s = s + ''.join(i) + " "
    return s

def main(name):

    driver = get_driver()
    
    li=[]
    with open("%s.txt"%(name)) as f:
        li = f.readlines()
    s0 = [re.split("[\s\n]+",i)[0] for i in li if len(i.strip())>0]
    s0 = list(set(s0)) 
    
    s1,s2=find_uri(driver,s0,[],[])     
        
    driver.close()
    print(str(s2))
    with open("result.txt","a+") as f:
        for li in s2:
            f.write(li)
            f.write("\n")
        f.write("\n")
        f.write("\n")
        f.write("\n")

def main2(name):
    
    # print(pinyin("忠厚传家久"))
    # print(yinjie("诗书继世长"))  
    li=[]
    with open("孤本1.txt") as f:
        li = f.readlines()
        
    s0 = [re.split("[\s\n]+",i)[0] for i in li if len(i.strip())>0]
    dict={}
    for x in s0:
        dict[x]=pinyin(x)
        print(x+":"+pinyin(x))
    
    with open("%s.txt"%(name),"w+") as f:
        for k,v in dict.items():
            f.write(v)
            f.write(" ")
            f.write(k)
            f.write("\n")
        pass
        

if __name__ == "__main__":
    
    name = "大杂烩1"
    main2(name)
    print("-------------------------")
    main(name)
    #         copy/b *.ts x1.mp4
    pass
    

内容大概是先用main2(name)从“孤本.txt”中读取信息将汉语转换成拼音,按照每行"anyiyu 安逸鱼"这样格式存储,再用main(name)读取文本信息,利用selenium+chromedriver执行,所有的文件都存放在一个文件夹下。结果输出在result.txt文本中

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值