# -*- coding: utf-8 -*-
"""
Created on Tue Jul 7 11:15:08 2020
@author: anyiyu
"""
import re
import time
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import pypinyin
def get_driver():
driver = webdriver.Chrome(executable_path="../chromedriver.exe")
url = "http://whois.chinaz.com/ziwei.com"
driver.get(url)
return driver
def find_uri(driver,s0=[],s1=[],s2=[]):
s4=[]
for li in s0:
print(li)
try:
driver.find_element_by_id('DomainName').clear()
driver.find_element_by_id('DomainName').send_keys("%s.com"%(li))
WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CLASS_NAME,'search-write-btn')))
driver.find_element_by_class_name('search-write-btn').click()
result=""
time.sleep(2)
# 显示等待知道元素出现
WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CLASS_NAME,'search-write-btn')))
result = driver.find_element_by_xpath("//*[@class='whoisl-wrap fl']")
except Exception:
s4.append(li)
pass
result = result if result else ""
time.sleep(1)
if result:
s1.append(li)
else:
s2.append(li)
s3=[]
s2 = list(set(s2))
if not s4:
s3,s2=find_uri(s4,[],s2)
for x in s3:
s1.append(x)
return s1,s2
# 不带声调的(style=pypinyin.NORMAL)
def pinyin(word):
s = ''
for i in pypinyin.pinyin(word, style=pypinyin.NORMAL):
s += ''.join(i)
return s
# 带声调的(默认)
def yinjie(word):
s = ''
# heteronym=True开启多音字
for i in pypinyin.pinyin(word, heteronym=True):
s = s + ''.join(i) + " "
return s
def main(name):
driver = get_driver()
li=[]
with open("%s.txt"%(name)) as f:
li = f.readlines()
s0 = [re.split("[\s\n]+",i)[0] for i in li if len(i.strip())>0]
s0 = list(set(s0))
s1,s2=find_uri(driver,s0,[],[])
driver.close()
print(str(s2))
with open("result.txt","a+") as f:
for li in s2:
f.write(li)
f.write("\n")
f.write("\n")
f.write("\n")
f.write("\n")
def main2(name):
# print(pinyin("忠厚传家久"))
# print(yinjie("诗书继世长"))
li=[]
with open("孤本1.txt") as f:
li = f.readlines()
s0 = [re.split("[\s\n]+",i)[0] for i in li if len(i.strip())>0]
dict={}
for x in s0:
dict[x]=pinyin(x)
print(x+":"+pinyin(x))
with open("%s.txt"%(name),"w+") as f:
for k,v in dict.items():
f.write(v)
f.write(" ")
f.write(k)
f.write("\n")
pass
if __name__ == "__main__":
name = "大杂烩1"
main2(name)
print("-------------------------")
main(name)
# copy/b *.ts x1.mp4
pass
内容大概是先用main2(name)从“孤本.txt”中读取信息将汉语转换成拼音,按照每行"anyiyu 安逸鱼"这样格式存储,再用main(name)读取文本信息,利用selenium+chromedriver执行,所有的文件都存放在一个文件夹下。结果输出在result.txt文本中