爬取网站
网易财经
创建的文件
List_url.py
from finance.code_list import CodeList
def Shanghai_Stock_Index():
"""上证指数"""
code = int(input("证券代号:"))
if code >= 201000 and code <= 900957:
year = int(input("年份:"))
if year >= 1991 and year <= 2020:
season = int(input("季度(1 2 3 4):"))
if season == 1 or season == 2 or season == 3 or season == 4:
url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year,season)
else:
print("您输入的季度错误,请重新输入!")
else:
print("您输入的年份有误,请重新输入!")
else:
print("您输入的证券代码有误,请重新输入!")
return url
def Shenzhen_Stock_Index():
"""深证指数"""
code = input("证券代号:")
CODE_List = CodeList()
if code in CODE_List:
year = int(input("年份:"))
if year >= 1991 and year <= 2020:
season = int(input("季度(1 2 3 4):"))
if season == 1 or season == 2 or season == 3 or season == 4:
url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year,season)
else:
print("您输入的季度,请重新有误!")
else:
print("您输入的年份有误,请重新输入!")
elif int(code) >= 131800 and int(code) <= 300790:
year = int(input("年份:"))
if year >= 1991 and year <= 2020:
season = int(input("季度(1 2 3 4):"))
if season == 1 or season == 2 or season == 3 or season == 4:
url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year, season)
else:
print("您输入的季度错误,请重新输入!")
else:
print("您输入的年份有误,请重新输入!")
else:
print("您输入的证券代码有误,请重新输入!")
return url
def exponent():
a = int(input("请问您要执行深证指数操作还是上证指数操作(深证指数请输入:1 上证指数请输入:2):"))
if a == 1:
item = Shanghai_Stock_Index()
elif a == 2:
item = Shenzhen_Stock_Index()
else:
print("输入错误,请重新输入!")
return item
code_list.py
def CodeList():
a = ["00000" + str(n) for n in range(1, 10)]
b = ["0000" + str(n) for n in range(10, 100)]
c = ["000" + str(n) for n in range(100, 1000)]
d = ["001696", "001872", "001896", "001965", "001979"]
e = ["00" + str(n) for n in range(2000, 2976)]
f = ["003816", "031005", "031007", "038011"] + ["0" + str(n) for n in range(38014, 38018)]
CODE_List = a + b + c + d + e + f
return CODE_List
pro_data.py
def Data_Convert(replace, replaced, type, data_list):
"""数据格式的变换"""
new_data_list = []
for i in data_list:
st = str(i).replace(replace, replaced)
new_data_list.append(type(st))
return new_data_list
Spider_Data.py
from urllib.request import Request, urlopen
from finance.pro_data import Data_Convert
from finance.List_url import exponent
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from pandas import DataFrame
import re
def html():
"""爬取网页内容"""
headers = {
'User-Agent': UserAgent().Chrome
}
url = exponent()
request = Request(url, headers = headers)
response = urlopen(request)
info = response.read().decode()
return info
def get_Data():
"""获取数据"""
info = html()
soup = BeautifulSoup(info, 'html.parser')
con = soup.find_all('table', attrs={'class':'table_bg001 border_box limit_sale'})
pattern1 = re.compile(r'(?<=<tr class="">).*?(?=</tr>)')
pattern2 = re.compile(r'(?<=<tr class="dbrow">).*?(?=</tr>)')
data1 = re.findall(pattern1, str(con))
data2 = re.findall(pattern2, str(con))
pattern_v1 = re.compile(r'(?<=<td>).*?(?=</td>)')
data_v1 = re.findall(pattern_v1, str(data1))
pattern_v2 = re.compile(r'(?<=<td>).*?(?=</td>)')
data_v2 = re.findall(pattern_v2, str(data2))
Times = []
opening_price = []
max_price = []
min_price = []
closing_price = []
change_amount = [] #涨跌额
price_limit = [] #涨跌幅
trading_amount = [] #成交量
trading_volume = [] #成交额
while True:
if len(data_v1) and len(data_v1) != 0:
_ = [Times,opening_price,max_price,min_price,closing_price,\
change_amount,price_limit,trading_amount,trading_volume]
for item in _:
item.append(data_v1[0])
item.append(data_v2[0])
del data_v1[0], data_v2[0]
else:
break
new_opening_price = Data_Convert("," , "", float, opening_price)
new_max_price = Data_Convert("," , "", float, max_price)
new_min_price = Data_Convert("," , "", float, min_price)
new_closing_price = Data_Convert("," , "", float, closing_price)
new_trading_amount = Data_Convert("," , "", float, trading_amount)
new_trading_volume = Data_Convert("," , "", float, trading_volume)
dic = {'日期':Times, '开盘价':new_opening_price, '最高价':new_max_price, '最低价':new_min_price, \
'收盘价':new_closing_price, '涨跌额':change_amount, '涨跌幅(%)':price_limit,'成交量(股)':new_trading_amount,\
'成交金额(元)':new_trading_volume}
DF = DataFrame(dic)
return DF
main.py
from finance.Spider_Data import get_Data
import os
import sys
def save_data():
"""保存数据"""
_ = input("请输入您要导出的数据文件(.csv)名称:")
if '/' in _ :
print("输入错误,请重新输入!")
else:
save_path = "./save_csv/" + _
if '.csv' in save_path:
if os.path.exists(save_path):
os.remove(save_path)
Data.to_csv(save_path, sep=',', index=False, header=True)
else:
Data.to_csv(save_path, sep=',', index=False, header=True)
else:
new_save_path = save_path + '.csv'
if os.path.exists(new_save_path):
os.remove(new_save_path)
Data.to_csv(new_save_path, sep=',', index=False, header=True)
else:
Data.to_csv(new_save_path, sep=',', index=False, header=True)
def main():
"""主程序"""
show = input("是否展示数据:")
if show == '是':
print(Data)
_show_ = input("是否要下载数据:")
if _show_ == '是':
save_data()
elif _show_ == '否':
sys.exit(0)
else:
print("输入错误,请重新输入!")
elif show == '否':
_show_ = input("是否要下载数据:")
if _show_ == '是':
save_data()
elif _show_ == '否':
sys.exit(0)
else:
print("输入错误,请重新输入!")
else:
print("输入错误,请重新输入!")
if __name__ == '__main__':
Data = get_Data()
main()
运行
在main.py下运行,结果如下: