from selenium import webdriver
from idiom import DbHandle
option = webdriver.ChromeOptions()
option.add_argument('headless')
web = webdriver.Chrome(options=option)
web.get("http://xh.5156edu.com/")
web.find_element_by_link_text("按拼音检索").click()
db = DbHandle.DbHandle()
sql = "SELECT id,body FROM py_body"
print(sql)
list = db.select_all(sql)
print(list)
for one in list:
print('-----111111----')
print(one['body']) #拼音
print(one['id']) #拼音ID
web.find_element_by_link_text(one['body']).click()
div = web.find_elements_by_xpath('//tr[@bgcolor="#ffffff"]') #每个音调的标签块
tone = 0
for two in div:
tone = tone + 1
py1 = two.find_element_by_class_name('font_14')
print(py1.text+'----------000-----'+str(tone)) #音调
word = two.find_elements_by_class_name('fontbox') #汉字的a标签
for py in word:
print(py.text[0]+'-------9---') #汉字
sql = "INSERT INTO chinese_characters (pin_yin_id,word,pin_yin,tone) " \
"VALUES ('%s','%s','%s','%s')" % (one['id'], py.text[0], py1.text, tone)
print(sql)
db.update(sql)
web.back()
from selenium import webdriver
from word import DbHandel
option = webdriver.ChromeOptions()
option.add_argument('headless')
web = webdriver.Chrome(options=option)
web.get("http://xh.5156edu.com/")
web.find_element_by_link_text("按拼音检索").click()
# list = web.find_elements_by_tag_name('p')
data = "abcdefghijklmnopqrstuvwxyz"
db = DbHandel.DbHandle()
sql = "SELECT id,head FROM py_head"
print(sql)
list = db.select_all(sql)
print(list)
for one in list:
print('-----111111----')
print(one['head'])
print(one['id'])
condition = '//a[starts-with(text(), "' + one['head'] + '")]'
body = web.find_elements_by_xpath(condition)
for one1 in body:
sql = "INSERT INTO py_body (body,head_id) VALUES ('%s','%s')" % (one1.text,one['id'])
print(sql)
db.update(sql)
print(one1.text + '---')
from selenium import webdriver
from word import DbHandel
option = webdriver.ChromeOptions()
option.add_argument('headless')
web = webdriver.Chrome(options=option)
web.get("http://xh.5156edu.com/")
web.find_element_by_link_text("按拼音检索").click()
list = web.find_elements_by_tag_name('p')
data = "abcdefghijklmnopqrstuvwxyz"
db = DbHandel.DbHandle()
for one in list:
text = str.lower(one.text)
if text == '':
continue
if text in data:
# condition = '//a[starts-with(text(), "a")]'
# '//a[contains(text(), "a")]' # '//a[text()="a"]' # '//*[text()="a"]' #'//div[contains(@style,"sp.gif")]'
print('-----111111----')
print(text)
condition = '//a[starts-with(text(), "'+text+'")]'
body = web.find_elements_by_xpath(condition)
for one1 in body:
print(one1.text + '---')
# sql = "INSERT INTO py_head (head) VALUES ('%s')" % text
# print(sql)
# db.update(sql)
# print(text)
from selenium import webdriver
from idiom import DbHandle
option = webdriver.ChromeOptions()
option.add_argument('headless')
driver = webdriver.Chrome(options=option)
driver.get("http://cy.5156edu.com/")
db = DbHandle.DbHandle()
sql = 'SELECT id,word FROM chinese_characters'
data = db.select_all(sql)
for one in data:
print('----'+str(one['id'])+'-----'+one['word']+'------')
# print('----' + str(one) + '-----')
search = driver.find_element_by_id('_SearchString')
search.clear()
search.send_keys(one['word'])
# search.send_keys(one)
select = driver.find_element_by_name('f_type2')
select.find_elements_by_tag_name('option')[1].click()
driver.find_element_by_xpath('//input[@value="查词典"]').click()
idiom = driver.find_elements_by_tag_name('u')
if len(idiom) == 0:
continue
print('--------------')
for two in idiom:
result = two.text
if result[0] != one['word']:
break
form = 9
if len(result) == 4:
form = 0
if result[0] == result[1]:
form = 1
if result[2] == result[3]:
form = 2
elif result[0] == result[2]:
form = 3
if result[1] == result[3]:
form = 4
elif result[0] == result[3]:
form = 5
elif result[1] == result[2]:
form = 6
elif result[1] == result[3]:
form = 7
elif result[2] == result[3]:
form = 8
sql = "INSERT INTO chinese_idiom (idiom,first_word_id,form_type) " \
"VALUES ('%s','%s','%s')" % (result, one['id'], form)
print(sql)
db.update(sql)
print(result + '------------'+str(form))