python双色球数据抓取及模拟生成高概率的号码

该博客首先从指定网站抓取双色球历史数据并存储到SQLite数据库中,接着分析每个号码出现的频率,生成概率分布。使用这些权重随机生成号码序列,并用Python实现。最后,将数据导出到Excel表格进行可视化展示。博客涵盖了网络爬虫、数据库操作、概率统计及数据分析。
摘要由CSDN通过智能技术生成

1、代码分两部分,第一部分是抓取网站上的双色球历史数据并存储到数据库,这部分代码来自第三方的。

2、通过历史记录分配每个号的权重,并随机生成前6个号码 的序列。

import requests
from bs4 import BeautifulSoup
import json
import sqlite3
import chardet
import xlwt

cnt = input("请输入你要获取的数量(30,50,100):")
cntINT = int(cnt)
url = 'http://www.cwl.gov.cn/cwl_admin/front/cwlkj/search/kjxx/findDrawNotice?name=ssq&issueCount='+cnt

try:
    if(0<cntINT<=100):
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        encoding = chardet.detect(r.content)['encoding']
        r.encoding = encoding
        demo = r.text
    else:
        print("请输入(0-100]的数")
except:
    print("获取数据失败")

#获取内容并转为字典
soup = BeautifulSoup(demo, "html.parser")
my_dict = []
my_dict02 = []
for s in soup:
    my_dict.append(s)
for m in my_dict[0]:
    my_dict02.append(m)
# print(my_dict02)
Dict_str = "".join(my_dict02)
new_dict = json.loads(Dict_str)
# print(new_dict)
final_dict = new_dict.get('result')

#写入json
with open('Ssq_data'+cnt+'.json', 'w',encoding='utf-8') as json_file:
     json.dump(final_dict, json_file, indent=4, ensure_ascii=False)

#建立数据库sqlite
conn = sqlite3.connect('Ssqdata'+cnt+'.sqlite')
cur = conn.cursor()
#建立表格
cur.executescript('''
DROP TABLE IF EXISTS Ssq;
DROP TABLE IF EXISTS RE;
DROP TABLE IF EXISTS DATE;
DROP TABLE IF EXISTS BL;
DROP TABLE IF EXISTS Red;
DROP TABLE IF EXISTS Blue;

CREATE TABLE Ssq (
    id     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    code   TEXT UNIQUE,
    date   INTEGER,
    red    INTEGER,
    blue   INTEGER
);

CREATE TABLE DATE (
    id     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    date   TEXT NOT NULL
);  

CREATE TABLE RE (
    id     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    red   TEXT NOT NULL
);  
CREATE TABLE BL (
    id     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    blue   TEXT NOT NULL
);    

CREATE TABLE Red (
    id     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    red    TEXT UNIQUE
);

CREATE TABLE Blue (  
    id     INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    blue   TEXT UNIQUE

)
''')
#读入刚刚建立的json数据
str_data = open('Ssq_data'+cnt+'.json',mode='r', encoding='utf-8')
json_data = json.load(str_data)
#写入数据
for entry in json_data:
    code = entry['code']
    date = entry['date']
    red = entry['red']
    blue = entry['blue']

    cur.execute('''INSERT OR IGNORE INTO Ssq (code,date,red,blue)
            VALUES ( ?,?,?,? )''', (code, date, red, blue))
    cur.execute('SELECT id FROM Ssq WHERE code = ? ', (code,))

    cur.execute('''INSERT OR IGNORE INTO DATE (date)
            VALUES ( ? )''', (date,))
    cur.execute('SELECT id FROM DATE WHERE date = ? ', (date,))

    cur.execute('''INSERT OR IGNORE INTO RE (red)
                VALUES ( ? )''', (red,))
    cur.execute('SELECT id FROM RE WHERE red = ? ', (red,))

    cur.execute('''INSERT OR IGNORE INTO Red (red)
                   VALUES ( ? )''', (red,))
    cur.execute('SELECT id FROM Red WHERE red = ? ', (red,))

    cur.execute('''INSERT OR IGNORE INTO BL (blue)
                   VALUES ( ? )''', (blue,))
    cur.execute('SELECT id FROM BL WHERE blue = ? ', (blue,))

    cur.execute('''INSERT OR IGNORE INTO Blue (blue)
                   VALUES ( ? )''', (blue,))
    cur.execute('SELECT id FROM Blue WHERE blue = ? ', (blue,))

    conn.commit()

book = xlwt.Workbook(encoding='utf-8')
#建立sheet
sheet = book.add_sheet('双色球')
#建立表头
sheet.write(0, 0, '期号')
sheet.write(0, 1, '开奖日期')
sheet.write(0, 2, '红球')
sheet.write(0, 3, '蓝球')
index =1
#写入数据
for entry in json_data:
    code = entry['code']
    date = entry['date']
    red = entry['red']
    blue = entry['blue']
    sheet.write(index, 0, code)
    sheet.write(index, 1, date)
    sheet.write(index, 2, red)
    sheet.write(index, 3, blue)
    index += 1

#保存表格

book.save(u"Ssq_data"+cnt+".xls")

#!/usr/bin/python3 

from bs4 import BeautifulSoup
import json
import sqlite3
import chardet
import xlwt
from collections import Counter
import matplotlib.pyplot as plt
import random
import numpy as np

#打开数据库sqlite
conn = sqlite3.connect('Ssqdata100.sqlite')
cur = conn.cursor()
#查询数据库
cur.execute("select * from RE")


b1list = []
b2list = []
b3list = []
b4list = []
b5list = []
b6list = []

bluelist = []

ballall_list = []

res_red = cur.fetchall()
cur.execute("select * from BL")
res_bl = cur.fetchall()
all_redball = []

for row_red, row_blue in zip(res_red, res_bl):
    redsq = row_red[1].split(",")
    all_redball.append(np.array([int(x) for x in redsq]))
    bluelist.append(row_blue[1])
    b1list.append(redsq[0])
    b2list.append(redsq[1])
    b3list.append(redsq[2])
    b4list.append(redsq[3])
    b5list.append(redsq[4])
    b6list.append(redsq[5])


l1 = Counter(b1list)
l2 = Counter(b2list)
l3 = Counter(b3list)
l4 = Counter(b4list)
l5 = Counter(b5list)
l6 = Counter(b6list)

l7 = Counter(bluelist)

#暂时不考虑最后一个球
#list_all = [l1, l2, l3, l4, l5, l6, l7]
list_all = [l1, l2, l3, l4, l5, l6]
list_index = 1

#存储1-7号球的球号对应出现频率的dict对象
list_weight = []

for l1 in list_all:
    #排序后的标签
    s1_labels = list(sorted(l1.keys()))
    #每个标签对应的实际数值
    s1_fracs = [l1.get(s1_labels[i]) for i in range(len(s1_labels))]
    s1_sum = sum(s1_fracs)
    index = 0
    dict_weight = {}
    for key in s1_labels:
        freq = int(s1_fracs[index]/s1_sum*100)
        print("%d号球数字%s 出现概率:%d" %(list_index, key, freq))
        dict_weight[key] = freq
        index += 1
    
    list_weight.append(dict_weight)
    list_index += 1
    print("\r\n")


def ballgame(self):
    for data in list_weight:
        value_list = []
        getball = []
        for key, value in data.items():
            value_list += value*[key]
        pick_value = random.choice(value_list)
        #print(pick_value)
        getball.append(pick_value)
        #构造机器参数

        getball = np.array([int(x) for x in getball])

        for lst in all_redball:
            if sum(lst == getball) >= 5:
                print("more than five!!")
                print(lst, getball)


count = 0
while True:
    count += 1
    ballgame(None)

print(count)
print('finish')


'''

s1_labels = list(sorted(l1.keys()))
s1_fracs = [l1.get(s1_labels[i]) for i in range(len(s1_labels))]
fig = plt.figure()

plt.pie(x=s1_fracs, labels=s1_labels, autopct='%3.1f %%',shadow=True, labeldistance=1.1, startangle = 90,pctdistance = 0.6)

plt.legend(loc='upper left', bbox_to_anchor=(-0.1, 1))

plt.title('ball 1 数据分布图',fontsize=20)
plt.show()

'''


  • 2
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值