python股票数据爬取

主要功能:

1.获取上证、深证的所有股票的代码;

2.爬取所有股票的近30天的开盘价、收盘价、涨跌幅等数据

3.简单建了个模型,筛选近30天持续增长的股票,以5天为一个单元,计算这5天的开盘价线性拟合的斜率,总共会计算30/5=6个斜率,所有斜率都为正,表示该股价近一个月持续增长,筛选出来

 

其它模型可借用爬取的股票数据自行建模,此文仅供个人学习用,请勿作违法用途,否则后果自负。

 

get_shares_code.py

import requests
import random

class GetSHShares():
    def __init__(self):
        super(GetSHShares, self).__init__()
        self.url = "http://hq.sinajs.cn/list={ID}"
        self.shares_list = []
        self.fake_head_list = [{'Connection':'close','Content-Type': 'application/json',
                           "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36","Referer":self.url},
                          {'Connection':'close','Content-Type': 'application/json',
                           "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/44.0.2403.155 Safari/537.36","Referer":self.url},
                          {'Connection':'close','Content-Type': 'application/json',
                           "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36","Referer":self.url},
                          ]
 
    def run(self):
        for i in range(4000):
            start_code = 600000 + i
            code_str = "sh" + str(start_code)
            self.url_ = self.url.replace("{ID}", code_str)
            head_index = random.randint(0, len(self.fake_head_list) - 1)
            fake_head = self.fake_head_list[head_index]
            response = requests.post(self.url_)

            data = response.text
            compare_str = "var hq_str_" + code_str + "=\"\";\n"
            if data != compare_str:
                self.shares_list.append(code_str)
                print("code_str:" + code_str)
        print(self.shares_list)
        return self.shares_list

class GetSZShares():
    def __init__(self):
        super(GetSZShares, self).__init__()
        self.url = "http://hq.sinajs.cn/list={ID}"
        self.shares_list = []
        self.fake_head_list = [{'Connection':'close','Content-Type': 'application/json',
                           "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36","Referer":self.url},
                          {'Connection':'close','Content-Type': 'application/json',
                           "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/44.0.2403.155 Safari/537.36","Referer":self.url},
                          {'Connection':'close','Content-Type': 'application/json',
                           "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36","Referer":self.url},
                          ]

    def run(self):
        for i in range(3000):
            start_code = i
            code_str = "sz" + str(start_code).zfill(6)
            self.url_ = self.url.replace("{ID}", code_str)
            head_index = random.randint(0, len(self.fake_head_list) - 1)
            fake_head = self.fake_head_list[head_index]
            response = requests.post(self.url_)

            data = response.text
            compare_str = "var hq_str_" + code_str + "=\"\";\n"
            if data != compare_str:
                self.shares_list.append(code_str)
                print("code_str:" + code_str)
        print(self.shares_list)
        return self.shares_list


'''
上证代码:sh600000
深圳代码:sz000000
'''

 

get_history_data.py

import requests
import re
from get_shares_codes import GetSZShares,GetSHShares

class Get_Price():
    def __init__(self,day_num):
        self.day_num = day_num

    def run(self,trade_num):
        self.trade_num = trade_num
        self.cirnum = 1
        self.return_price_list = []
        for seasonnum in range(4,0,-1):
            url_temp = "http://quotes.money.163.com/trade/lsjysj_{trade_num}.html?year=2019&season={season_num}"
            self.url = url_temp.replace("{trade_num}",self.trade_num)
            self.url = self.url.replace("{season_num}",str(seasonnum))
            self.response = requests.post(self.url)
            self.html = self.response.text
            self.pattern_price = "(?<=<td class='cGreen'>).*?(?=</td>)|(?<=<td class='cRed'>).*?(?=</td>)"
            self.pattern_data = "(?<=<tr class=''><td>).*?(?=</td>)|(?<=<tr class='dbrow'><td>).*?(?=</td>)"
            self.price_list = re.findall(self.pattern_price,self.html)
            self.data_list = re.findall(self.pattern_data,self.html)
            for data_index in range(len(self.data_list)):
                self.price_temp = {}
                self.price_temp['data'] = self.data_list[data_index]
                self.price_temp['start_price'] =self.price_list[data_index*6 + 0]
                self.price_temp['high_price'] = self.price_list[data_index * 6 + 1]
                self.price_temp['low_price'] = self.price_list[data_index * 6 + 2]
                self.price_temp['end_price'] = self.price_list[data_index * 6 + 3]
                self.price_temp['change_price'] = self.price_list[data_index * 6 + 4]
                self.price_temp['change_per'] = self.price_list[data_index * 6 + 5]
                self.return_price_list.append(self.price_temp)
                self.cirnum = self.cirnum + 1
                if int(self.cirnum) > int(self.day_num):
                    return self.return_price_list

class Get_risetype_shares():
    def __init__(self):
        return

    def get_slope(self):
        self.aver_xy = 0
        self.aver_y = 0
        for i in range(5):
            self.aver_xy = self.aver_xy + i*self.price_info5[i]
            self.aver_y = self.aver_y + self.price_info5[i]
        self.aver_xy = float(self.aver_xy/5)
        self.aver_y = float(self.aver_y/5)
        self.slope = float(self.aver_xy/2) - self.aver_y
        return self.slope

    def Is_risetype_shares(self, price_info,print_str):
        self.price_info5 = [0,0,0,0,0]
        self.slope_list = [0,0,0,0,0,0]
        self.result = True
        for day_num in range(int(len(price_info)/5)):
            for i in range(5):
                self.price_info5[i] = float(price_info[day_num*5 + i]['start_price'].replace(",",""))
            self.slope_list[day_num] = self.get_slope()
        for i in range(len(self.slope_list)):
            if(self.slope_list[i] < 0):
                self.result = False
        if self.result:
            print("\033[0;34;48m" + print_str + " " + str(self.slope_list) + "\033[0;34;48m  True")
        else:
            print("\033[0;31;48m" + print_str + " " + str(self.slope_list) + "\033[0;31;48m  False")#31为红色字体  34为蓝色字体


sh = GetSHShares()
sz = GetSZShares()

#sh_list = sh.run()
#sh_list = ["sh600519"]
sz_list = sz.run()
#,"603986","300661","000063"
Shares = ["002547"]
temp = Get_Price(30)
slope = Get_risetype_shares()
for Shares_str in sz_list:
    print_str = "股票代码:" + Shares_str
    price_info_list = temp.run(Shares_str[2:])
    if price_info_list == None:
        continue
    price_info_len = len(price_info_list)
    for i in range(int(price_info_len/2)):
        price_temp = price_info_list[i]
        price_info_list[i] = price_info_list[price_info_len - 1 - i]
        price_info_list[price_info_len - 1 - i] = price_temp

    slope.Is_risetype_shares(price_info_list,print_str)
    '''
    for price_info in price_info_list:
        print(price_info)
    '''

 

  • 1
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值