爬取股票信息

爬取网站

网易财经

 

创建的文件

 

 

List_url.py

 

from finance.code_list import CodeList

def Shanghai_Stock_Index():
    """上证指数"""
    code = int(input("证券代号:"))
    if code >= 201000 and code <= 900957:
        year = int(input("年份:"))
        if year >= 1991 and year <= 2020:
            season = int(input("季度(1  2  3  4):"))
            if season == 1 or season == 2 or season == 3 or season == 4:
                url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year,season)
            else:
                print("您输入的季度错误,请重新输入!")
        else:
            print("您输入的年份有误,请重新输入!")
    else:
        print("您输入的证券代码有误,请重新输入!")

    return url

def Shenzhen_Stock_Index():
    """深证指数"""
    code = input("证券代号:")
    CODE_List = CodeList()
    if code in CODE_List:
        year = int(input("年份:"))
        if year >= 1991 and year <= 2020:
            season = int(input("季度(1  2  3  4):"))
            if season == 1 or season == 2 or season == 3 or season == 4:
                url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year,season)
            else:
                print("您输入的季度,请重新有误!")
        else:
            print("您输入的年份有误,请重新输入!")
    elif int(code) >= 131800 and int(code) <= 300790:
        year = int(input("年份:"))
        if year >= 1991 and year <= 2020:
            season = int(input("季度(1  2  3  4):"))
            if season == 1 or season == 2 or season == 3 or season == 4:
                url = "http://quotes.money.163.com/trade/lsjysj_zhishu_{}.html?year={}&season={}".format(code, year, season)
            else:
                print("您输入的季度错误,请重新输入!")
        else:
            print("您输入的年份有误,请重新输入!")
    else:
        print("您输入的证券代码有误,请重新输入!")

    return url

def exponent():
    a = int(input("请问您要执行深证指数操作还是上证指数操作(深证指数请输入:1   上证指数请输入:2):"))
    if a == 1:
        item = Shanghai_Stock_Index()
    elif a == 2:
        item = Shenzhen_Stock_Index()
    else:
        print("输入错误,请重新输入!")
    return item

 

code_list.py

 

def CodeList():
    a = ["00000" + str(n) for n in range(1, 10)]
    b = ["0000" + str(n) for n in range(10, 100)]
    c = ["000" + str(n) for n in range(100, 1000)]
    d = ["001696", "001872", "001896", "001965", "001979"]
    e = ["00" + str(n) for n in range(2000, 2976)]
    f = ["003816", "031005", "031007", "038011"] + ["0" + str(n) for n in range(38014, 38018)]
    CODE_List = a + b + c + d + e + f

    return CODE_List


 

pro_data.py

 

def Data_Convert(replace, replaced, type, data_list):
    """数据格式的变换"""
    new_data_list = []
    for i in data_list:
        st = str(i).replace(replace, replaced)
        new_data_list.append(type(st))

    return new_data_list

 

Spider_Data.py

 

from urllib.request import Request, urlopen
from finance.pro_data import Data_Convert
from finance.List_url import exponent
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from pandas import DataFrame
import re



def html():
    """爬取网页内容"""
    headers = {
        'User-Agent': UserAgent().Chrome
    }
    url = exponent()
    request = Request(url, headers = headers)
    response = urlopen(request)
    info = response.read().decode()
    return info

def get_Data():
    """获取数据"""
    info = html()
    soup = BeautifulSoup(info, 'html.parser')
    con = soup.find_all('table', attrs={'class':'table_bg001 border_box limit_sale'})
    pattern1 = re.compile(r'(?<=<tr class="">).*?(?=</tr>)')
    pattern2 = re.compile(r'(?<=<tr class="dbrow">).*?(?=</tr>)')
    data1 = re.findall(pattern1, str(con))
    data2 = re.findall(pattern2, str(con))
    pattern_v1 = re.compile(r'(?<=<td>).*?(?=</td>)')
    data_v1 = re.findall(pattern_v1, str(data1))
    pattern_v2 = re.compile(r'(?<=<td>).*?(?=</td>)')
    data_v2 = re.findall(pattern_v2, str(data2))

    Times = []
    opening_price = []
    max_price = []
    min_price = []
    closing_price = []
    change_amount = []    #涨跌额
    price_limit = []      #涨跌幅
    trading_amount = []   #成交量
    trading_volume = []   #成交额

    while True:
        if len(data_v1) and len(data_v1) != 0:
            _ = [Times,opening_price,max_price,min_price,closing_price,\
                 change_amount,price_limit,trading_amount,trading_volume]
            for item in _:
                item.append(data_v1[0])
                item.append(data_v2[0])
                del data_v1[0], data_v2[0]

        else:
            break

    new_opening_price = Data_Convert("," , "", float, opening_price)
    new_max_price = Data_Convert("," , "", float, max_price)
    new_min_price = Data_Convert("," , "", float, min_price)
    new_closing_price = Data_Convert("," , "", float, closing_price)
    new_trading_amount = Data_Convert("," , "", float, trading_amount)
    new_trading_volume = Data_Convert("," , "", float, trading_volume)

    dic = {'日期':Times, '开盘价':new_opening_price, '最高价':new_max_price, '最低价':new_min_price, \
           '收盘价':new_closing_price, '涨跌额':change_amount, '涨跌幅(%)':price_limit,'成交量(股)':new_trading_amount,\
           '成交金额(元)':new_trading_volume}

    DF = DataFrame(dic)

    return DF


 

main.py

 

from finance.Spider_Data import get_Data
import os
import sys

def save_data():
    """保存数据"""
    _ = input("请输入您要导出的数据文件(.csv)名称:")
    if '/' in _ :
        print("输入错误,请重新输入!")
    else:
        save_path = "./save_csv/" + _
        if '.csv' in save_path:
            if os.path.exists(save_path):
                os.remove(save_path)
                Data.to_csv(save_path, sep=',', index=False, header=True)
            else:
                Data.to_csv(save_path, sep=',', index=False, header=True)
        else:
            new_save_path = save_path + '.csv'
            if os.path.exists(new_save_path):
                os.remove(new_save_path)
                Data.to_csv(new_save_path, sep=',', index=False, header=True)
            else:
                Data.to_csv(new_save_path, sep=',', index=False, header=True)


def main():
    """主程序"""
    show = input("是否展示数据:")
    if show == '是':
        print(Data)
        _show_ = input("是否要下载数据:")
        if _show_ == '是':
            save_data()
        elif _show_ == '否':
            sys.exit(0)
        else:
            print("输入错误,请重新输入!")

    elif show == '否':
        _show_ = input("是否要下载数据:")
        if _show_ == '是':
            save_data()
        elif _show_ == '否':
            sys.exit(0)
        else:
            print("输入错误,请重新输入!")
    else:
        print("输入错误,请重新输入!")

if __name__ == '__main__':
    Data = get_Data()
    main()

 

 

运行

 

在main.py下运行,结果如下:

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小码爱撞墙

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值