#不会正则表达式,爬虫效率比较低,需要仔细学习一下
import requests
from bs4 import BeautifulSoup
import pandas as pd
result_tongfang=pd.DataFrame()
for i in range(9,27):
url='http://www.aegonthtf.com/rate/ratedetail.do?id='+str(i)
page=requests.get(url,timeout=15)
soup=BeautifulSoup(page.text,'lxml')
soup
name=soup.find_all(class_='txt2')[0].string
content=soup.find_all(class_='tablestyle')[0].text
content=content.replace('\n','').replace('\r','').replace('/','').replace(' ','')
end_index=int(len(content[25:])/22+1)
result=[]
for i in range(1,end_index):
first=3+i*22
end=25+i*22
data=content[first:end]
result_data={}
result_data['结算月份']=data[:6]
result_data['结算日利率']=data[6:17]
result_data['结算年利率']=data[17:]
result.append(result_data)
result=pd.DataFrame(result)
result['产品名称']=len(result)*[name]
result_tongfang=result_tongfang.append(result)
result_tongfang.to_csv('C:/Users/Administrator/Desktop/同方全球人寿.csv')
#效果