#这个文件是分析获得各个基金公司的IP地址
from selenium import webdriver
import requests
import bs4
from bs4 import BeautifulSoup
from detail_right import spider_main
browser = webdriver.Chrome() #创建一个Chrome驱动器对象
def getHTMLText(url):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
def getCompanyURLs(start_url):
cp_urls = []
cp_names = []
browser.get(start_url)
html = browser.page_source
soup = BeautifulSoup(html,"html.parser")
cp_links = soup.find_all('td',{'class':'links_td links_val'})
for i in range(50):
cp_link = cp_links[i+1].find('a').attrs['href'] #第一个不属于公司链接
cp_name = cp_links[i+1].find('a').get_text()
cp_urls.append(cp_link)
cp_names.append(cp_name)
return cp_urls,cp_names
def main():
start_url = 'http://fund.10jqka.com.cn/datacenter/jjgspm/'
cp_urls,cp_names = getCompanyURLs(start_url)
for i in range(50):
cp_url = cp_urls[i]
cp_name = cp_names[i]
spider_main(cp_url,cp_name,i+1)
main()
'''这是主文件所调用的获取基金公司财报相关数据的细节页面'''
#获取细节信息并写入到excel多个sheet表中
import requests
import bs4
from bs4 import BeautifulSoup
from openpyxl import Workbook
import bs4
import time
def getHTMLText(url):
try:
r = requests.get(url, timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
#获取基本信息函数
def get_data(book, cp_url,cp_name):
html = getHTMLText(cp_url)
soup = BeautifulSoup(html,"html.parser")
sheet1 = book.create_sheet("公司概况表",0) #按照编号位置来创建sheet
sheet2 = book.create_sheet("绩效评估表",1)
#获取公司基本情况
cp_info =soup.find('ul',{'class':'rank_list fl'}).find_all('li')
sheet1.cell(row=1,column=1).value=cp_name
sheet1.cell(row=2,column&#