背景:家里有个亲戚生了对龙凤胎,取名字很头痛。他们要求名字的第三个字要是五行属土的字,而且两个孩子名字的第三个字能组成一个寓意不错的词。
于是我写了个小程序,突然觉得python真的很强大。哈哈哈。
1.先网上把五行属土的字放到txt文件中,文件格式是自己处理好了,内容都是以逗号格分。
2.使用文件中的字循环放进一个组词网进行组词,然后组词的字都要判断是否也是属土的字。生成一个词典文件。
3.使用生成的词典循环放进百度词典网站查找释义,生成词典释义文件。
4.手动选择释义较好的词放进测评网站测名字分数。
代码很粗糙,但觉得好好玩。
searchzuci.py
#-*- coding: utf-8 -*-
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException
import time,os
from searchmodal import *
import threading
def catch_web(url,infile,outfile,items):
allitem =[]
driver = conectweb(url)
nameitem = readfile(infile)
nameitempop = items
for i in nameitem:
G = i
try:
driver.find_element_by_xpath(".//*[@id='ss_tj_value_1']").clear()
driver.find_element_by_xpath(".//*[@id='ss_tj_value_1']").send_keys(G)
driver.find_element_by_xpath(".//*[@id='main_content']/div[2]/form/input[2]").click()
time.sleep(1)
except NoSuchElementException:
print(G+"is faile")
now_url1 = driver.current_url
url1 = getHtml(now_url1)
soup = BeautifulSoup(url1, 'html.parser' )
tagsdiv = soup.find_all('ul',attrs={'class':'list_2'})
item=[]
for tag in tagsdiv:
itemli = tag.find_all('li')
for it in itemli:
item.append(it.get_text())
newitem=[]
for x in range(len(item)):
new_str = item[x]
if len(new_str)==2:
new_str1 = new_str.strip(G)
if new_str1 in nameitempop:
newitem.append(new_str)
nameitempop.pop(nameitempop.index(G))
allitem.append(newitem)
file_out = open(outfile,'w')
for value in allitem:
for y in value:
file_out.write(str(y))
file_out.write(',')
file_out.close()
url = 'http://zuci.51240.com/'
in_filename = "name.txt"
in_filename1 ="name1.txt"
out_filename="allname.txt"
out_filename1 ="allname1.txt"
nameitempop = readfile("800tu.txt")
starttime = time.strftime('%Y-%m-%d %X',time.localtime())
print(starttime)
threads=[]
t1 = threading.Thread(target=catch_web,args=(url,in_filename,out_filename,nameitempop))
threads.append(t1)
#t2 = threading.Thread(target=catch_web,args=(url,in_filename1,out_filename1,nameitempop))
#threads.append(t2)
for t in threads:
t.start()
for t in threads:
t.join()
finaltime = time.strftime('%Y-%m-%d %X',time.localtime())
print(finaltime)
serachciyi.py
#-*- coding: utf-8 -*-
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
import threading
import multiprocessing
def readfile(filename):
file = open(filename,'r')
lines = file.readlines()
file.close()
str_list=[]
for line in lines:
str_list=line.split(',')
return str_list
def contributpjs():
#使用DesiredCapabilities模块来伪装userAgent
drap = dict(DesiredCapabilities.PHANTOMJS)
#设置要伪装的userAgent
drap["phantomjs.page.settings.userAgent"]=("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 ")
#提高性能
server_args=[]
server_args.append('--load-images=no') #关闭图片加载
server_args.append('--disk-cache=yes') #开启缓存
server_args.append('--ignore-ssl-errors=ture') #忽略https错误
browser = webdriver.PhantomJS(executable_path='C:/Users/Administrator/AppData/Local/Programs/Python/Python36/phantomjs.exe',desired_capabilities=drap)
return browser
def catchweb(infile,url,outfile):
# drap = dict(DesiredCapabilities.PHANTOMJS)
# drap["phantomjs.page.settings.userAgent"]=("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 ")
# driver = webdriver.PhantomJS(executable_path='C:/Users/Administrator/AppData/Local/Programs/Python/Python36/phantomjs.exe',desired_capabilities=drap)
driver = contributpjs()
list = readfile(infile)
allitem={}
for i in list:
try:
driver.get(url)
driver.implicitly_wait(5)
#driver.set_page_load_timeout(10)
driver.find_element_by_xpath(".//*[@id='kw']").clear()
driver.find_element_by_xpath(".//*[@id='kw']").send_keys(i)
driver.find_element_by_xpath(".//*[@id='su']").click()
time.sleep(1)
print(i)
stringtext=driver.find_element_by_xpath(".//*[@id='detailmean-wrapper']/div[1]/dl/dd/ol/li[1]/p[1]").text
allitem[i]=stringtext
except NoSuchElementException:
print(i+' is faile')
driver.quit()
file_out = open(outfile,'w')
for key,value in allitem.items():
file_out.write(key)
file_out.write(':')
file_out.write(value)
file_out.write('\n')
file_out.close()
url = 'http://dict.baidu.com/'
filename={"aname.txt":"ziyi.txt","aname1.txt":"ziyi1.txt","aname2.txt":"ziyi2.txt","aname3.txt":"ziyi3.txt","aname4.txt":"ziyi4.txt"}
threads=[]
starttime = time.strftime('%Y-%m-%d %X',time.localtime())
print(starttime)
if __name__ =='__main__':
pool = multiprocessing.Pool(processes=5)
for x,y in filename.items():
pool.apply_async(catchweb,(x,url,y))
print(x +" is start")
pool.close()
pool.join()
# for res in threads:
# print(res.get())
finaltime = time.strftime('%Y-%m-%d %X',time.localtime())
print(finaltime)