首先导入的库当然是selenium,为了设置超时,需要导入time
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
导入之后可以打开网易主页
driver = webdriver.Chrome()
driver.get(u"http://music.163.com/")
print "正在打开网页",driver.title
等待页面加载,选择爬陈粒的网易歌词第一页
time.sleep(2)
name = u"陈粒"
driver.find_element_by_css_selector("input[class=\"txt j-flag\"]").send_keys(name)
driver.find_element_by_css_selector("input[class=\"txt j-flag\"]").send_keys(Keys.ENTER)
转到名称为contengframe的框架上来
driver.switch_to_frame("contentFrame")
爬前三十首歌,定义一下fun函数
def fun(a):
print "正在打开歌曲页面"
time.sleep(2)
try:
driver.find_element_by_css_selector("a[href^='javascript:void(0)']").click()
except:
js="var q=document.body.scrollTop=500"
driver.execute_script(js)
time.sleep(1)
driver.find_element_by_css_selector("a[href^='javascript:void(0)']").click()
else:
driver.implicitly_wait(10) # seconds
driver.execute_script('window.stop()')
print "正在获取歌词"
text = driver.find_element_by_id("lyric-content").text
text_all = text[:-2]
name = driver.find_element_by_class_name("tit").text
print "正在新建文件保存"
f=open('C:/Users/user/Desktop/lyric/%s.txt'%name.replace("\n", "@@").split("@@")[0],'w')
f.write(text_all.encode('utf8'))
print "正在保存歌词",name
f.close()
driver.back()
driver.switch_to_frame("contentFrame")
time.sleep(2)
此函数为打开歌曲页面之后的操作,然而在打开之前会有很多其他的操作
首先确认该歌曲歌手是不是我们搜索的名字,试图点击是在界面刷新之后的界面,当歌曲链接不在此界面上,则下拉,另外还会出现网易的登录界面如下,此时必须回到主窗口关闭掉再回到刚才的contentframe窗口继续爬取,具体代码如下,另外中间可能会有一些错误出现,可能是由于页面加载不及时等原因,此时重新运行即可
for i in range(0,30):
a=i+1
print "第",a,"首"
if name in driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(4)'%a).text:
try:
print "试图点击"
driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(2)>div>div>a'%a).click()
fun(a)
except:
try:
print "试图下拉点击"
js="var q=document.body.scrollTop=600"
driver.execute_script(js)
time.sleep(1)
driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(2)>div>div>a'%a).click()
fun(a)
except:
try:
print "试图二次下拉点击"
js="var q=document.body.scrollTop=1000"
driver.execute_script(js)
time.sleep(1)
driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(2)>div>div>a'%a).click()
fun(a)
except:
print "试图回到主页面关闭窗口"
driver.switch_to.default_content()
driver.find_element_by_class_name('zcls').click()
driver.switch_to_frame("contentFrame")
js="var q=document.body.scrollTop=1000"
driver.execute_script(js)
time.sleep(1)
driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(2)>div>div>a'%a).click()
fun(a)
else:
pass