selenium 网页爬虫 读取基金代码列表文件 flist.txt
flist.txt 一行一个基金代码:6位数字
先输入日期查询,再抓取天天基金网上的基金净值
fund3.py
# -*- coding: utf-8 -*-
import os, sys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
from datetime import date
class Fund(unittest.TestCase):
def setUp(self):
today = date.today().strftime("%Y%m%d")
ch_driver = os.path.abspath(r"D:\selenium\chromedriver.exe")
os.environ["webdriver.chrome.driver"]= ch_driver
self.driver = webdriver.Chrome()
self.driver.implicitly_wait(30)
self.base_url = "http://fund.eastmoney.com"
self.verificationErrors = []
self.accept_next_alert = True
self.flist = [] # fund list
fp = open("flist.txt",'r')
for line in fp:
if len(line.strip()) ==6:
self.flist.append(line.strip())
fp.close()
self.fp = open(today +'.txt','w')
print today +'.txt'
def test_fund(self):
flist = self.flist
fp = self.fp
driver = self.driver
for f1 in flist:
driver.get(self.base_url + "/f10/jjjz_%s.html" % (f1))
driver.find_element_by_id("lsjzSDate").clear()
driver.find_element_by_id("lsjzSDate").send_keys("2017-06-29")
driver.find_element_by_id("lsjzEDate").clear()
driver.find_element_by_id("lsjzEDate").send_keys("2017-07-07")
driver.find_element_by_css_selector("input.search").click()
time.sleep(1)
try:
div = driver.find_element_by_id("jztable")
table = div.find_elements_by_tag_name("table")
tbody = table[0].find_elements_by_tag_name("tbody")
t_rows = tbody[0].find_elements_by_tag_name('tr')
for row in t_rows:
fp.write(f1+' ')
tds = row.find_elements_by_tag_name('td')
for td in tds[0:4]:
fp.write(td.text +' ')
fp.write('\n')
except Exception as msg:
print msg
#
def is_element_present(self, how, what):
try: self.driver.find_element(by=how, value=what)
except NoSuchElementException as e: return False
return True
def is_alert_present(self):
try: self.driver.switch_to_alert()
except NoAlertPresentException as e: return False
return True
def close_alert_and_get_its_text(self):
try:
alert = self.driver.switch_to_alert()
alert_text = alert.text
if self.accept_next_alert:
alert.accept()
else:
alert.dismiss()
return alert_text
finally: self.accept_next_alert = True
def tearDown(self):
self.fp.close()
self.driver.quit()
self.assertEqual([], self.verificationErrors)
if __name__ == "__main__":
unittest.main()
flist.txt 一行一个基金代码:6位数字