大乐透分析软件
1、使用python从网站中爬取所有的大乐透中奖号码
2、使用c++分析红球、蓝球、组合重复出现次数
3、输入红球、蓝球判断历史中奖次数和出现次数
python爬取代码:
import os
import re
import time
import openpyxl
import requests
from requests.exceptions import RequestException
from lxml import etree
root = '/root/dlt/'
count = 0
qishus = []
reds = []
blues = []
def get_page(url):
try:
headers = {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
}
#kf = {
# 'user-agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7'}
#r = requests.get(url, kf, timeout=30)
#r.raise_for_status()
#r.encoding = 'UTF-8'
#html = r.text
#return html
response = requests.get(url=url,headers=headers)
# 更改编码方式,否则会出现乱码的情况
response.encoding = "utf-8"
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def parse_page(html):
try:
#red_list = re.findall(r'<span class="ball ball-red">.*?</span<span class="ball ball-red">.*?</span><span class="ball ball-red">.*?</span><span class="ball ball-red">.*?</span><span class="ball ball-red">.*?</span>', html)
#blue_list = re.findall(r'<span class="ball ball-blue">.*?</span><span class="ball ball-blue">.*?</span>', html)
#
#time.sleep(1)
## 提取日期时间
#for i in red_list:
# data_liat = i.split(',')[-1]
# d = re.sub(r'<span.*?>', ' ', data_liat)
# a = re.sub(r'</span>', '', d)
# reds.append(a)
## 提取 中奖号码
#for i in blue_list:
# ui = i.split(',')[-1]
# u = re.sub(r'<span.*?>', ' ', ui)
# o = re.sub(r'</span>', '', u)
# blues.append(o)
global count
count+=1
res = etree.HTML(html)
red = res.xpath('//*[@class="ball ball-red"]//text()')
reds.append(" ".join(red))
blue = res.xpath('//*[@class="ball ball-blue"]//text()')
blues.append(" ".join(blue))
print(red+blue)
except Exception as e:
pass
def write_excel(qishu, red, blue):
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.title = '2007'
sheet.cell(1, 1, '期数')
sheet.cell(1, 2, '红球')
sheet.cell(1, 3, '蓝球')
# 保存的数据需要是字典形式
for x, m in enumerate(qishu):
sheet.cell(x + 2, 1, m)
for x, m in enumerate(red):
sheet.cell(x + 2, 2, m)
for x, m in enumerate(blue):
sheet.cell(x + 2, 3, m)
workbook.save(root + '2007-2023.xlsx') # 保存工作簿
return workbook.save(root + '2007-2023.xlsx') # 保存工作簿
def main(num):
url = 'https://www.55125.cn/kaijiang/dlt/{}.htm'.format(str(num))
qishus.append(num)
html = get_page(url)
parse_page(html)
if __name__ == '__main__':
#maxRange={'2007':93}
maxRange = {'2007':93,'2008':154,'2009':153,'2010':153,
'2011':154,'2012':154,'2013':153,'2014':154,'2015':153,
'2016':154,'2017':153,'2018':154,'2019':150,'2020':134,
'2021':150,'2022':150,'2023':8
}
for key,value in maxRange.items():
for val in range(1,value+1):
sVal="%03d"%val
num = key+sVal
main(num)
write_excel(qishu=qishus,red=reds,blue=blues)
#for num in range(2017001,2017154):
# main(num)
#for num in range(18001, 18140):
# main(num)
print("大乐透爬取结束...")
print("爬取的数目为:"+str(count))
c++分析工具
// analdlt.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include <iostream>
#include <Windows.h>
#include "libxl.h"
#include <string>
#include <vector>
#include <map>
using namespace libxl;
std::string UnicodeToAnsi(const wchar_t* szStr)
{
int nLen = WideCharToMultiByte(CP_ACP, 0, szStr, -1, NULL, 0, NULL, NULL);
if (nLen == 0)
{
return "";
}
char* pResult = new char[nLen];
WideCharToMultiByte(CP_ACP, 0, szStr, -1, pResult, nLen, NULL, NULL);
std::string retStr = pResult;
if (pResult) delete[] pResult;
return retStr;
}
void getExcelCellString(Sheet* sheet, int row, int col, std::string& value)
{
if (sheet->cellType(row, col) == CELLTYPE_STRING)
{
value = UnicodeToAnsi(sheet->readStr(row, col));
}
else if (sheet->cellType(row, col) == CELLTYPE_NUMBER)
{
value = std::to_string((LONG64)sheet->readNum(row, col));
}
}
std::vector<std::string> splitStr(const std::string& str)
{
std::vector<std::string> ret;
char *s = (char*)str.c_str();
const char *d = " ";
char *p;
p = strtok(s, d);
while (p)
{
ret.push_back(p);
p = strtok(NULL, d);
}
return ret;
}
uint64_t strTouint64(const std::string& str)
{
std::vector<std::string> splitStr_ = splitStr(str);
uint64_t ret = 0;
for (int i = 0; i < splitStr_.size(); ++i)
{
int rm = std::atoi(splitStr_[i].c_str());
if (rm == 0)
break;
uint64_t tmp = 1;
tmp <<= rm;
ret |= tmp;
//ret = ret | tmp;
}
return ret;
}
uint16_t strTouint16(const std::string& str)
{
std::vector<std::string> splitStr_ = splitStr(str);
uint16_t ret = 0;
for (int i = 0; i < splitStr_.size(); ++i)
{
int rm = std::atoi(splitStr_[i].c_str());
if (rm == 0)
break;
uint16_t tmp = 1;
tmp <<= rm;
ret |= tmp;
//ret = ret | tmp;
}
return ret;
}
void initData(std::vector<uint64_t> &reds, std::vector<uint16_t> &blues, std::vector<uint64_t> &redblues)
{
Book* book = xlCreateBook();
if (!book)
{
return;
}
std::string excelPath = "E:\\2007-2023.xlsx";
TCHAR path[1024];
#ifdef UNICODE
MultiByteToWideChar(CP_ACP, 0, excelPath.c_str(), -1, path, 100);
#else
strcpy(excelPath.c_str(), strUsr);
#endif
if (!book->load(path))
{
book->release();
book = xlCreateXMLBook();
if (!book->load(path))
{
std::cout << book->errorMessage() << std::endl;
return;
}
}
book->setKey(L"EduAnalyze", L"windows-2929270505c6eb0c64ba6c6cafp8r7g0");
size_t sheetIndex = 0;
Sheet* sheet = book->getSheet(sheetIndex);
if (sheet)
{
int firstRow = sheet->firstRow(); //有数据的第一行行号
int lastRow = sheet->lastRow();//有数据的最后一行行号
int firstCol = sheet->firstCol(); //有数据的第一列列号
int lastCol = sheet->lastCol(); //有数据的最后一列列号
for (size_t row = 1; row < lastRow; ++row)
{
int col = 0;
std::string qishu;
getExcelCellString(sheet, row, col++, qishu);
std::string red;
getExcelCellString(sheet, row, col++, red);
uint64_t tmp = strTouint64(red);
reds.push_back(tmp);
std::string blue;
getExcelCellString(sheet, row, col++, blue);
uint16_t blueTmp = strTouint16(blue);
blues.push_back(blueTmp);
//uint64_t tmp = strTouint64(red);
tmp <<= 12;
tmp |= blueTmp;
redblues.push_back(tmp);
}
}
book->release();
}
std::string printTotal(uint64_t value)
{
uint64_t val = value;
std::string ret;
std::string bret;
std::string rret;
for (int i = 1; i < 64; ++i)
{
val >>= 1;
uint64_t tmp = 1;
tmp &= val;
if (i <= 12)
{
if (tmp)
{
bret += (std::to_string(i) + " ");
}
}
else
{
if (tmp)
{
int t = i;
t -= 12;
rret += (std::to_string(t) + " ");
}
}
}
ret = rret + bret;
return ret;
}
std::string printInt64(uint64_t value)
{
uint64_t val = value;
std::string ret;
for (int i = 1; i < 64; ++i)
{
val >>= 1;
uint64_t tmp = 1;
tmp &= val;
if (tmp)
{
ret += (std::to_string(i) + " ");
}
}
return ret;
}
std::string printInt16(const uint16_t value)
{
uint16_t val = value;
std::string ret;
for (int i = 1; i < 16; ++i)
{
val >>= 1;
uint16_t tmp = 1;
tmp &= val;
if (tmp)
{
ret += (std::to_string(i) + " ");
}
}
return ret;
}
std::map<uint16_t, int> getBlueTimes(std::vector<uint16_t> blues)
{
std::map<uint16_t, int> ret;
std::pair<std::map<uint16_t, int>::iterator, bool> r;
for (int i = 0; i < blues.size(); ++i)
{
r = ret.insert({ blues[i], 1 });
if (!r.second)
{
++ret[blues[i]];
}
}
return ret;
}
std::map<uint64_t, int> getRedTimes(std::vector<uint64_t> reds)
{
std::map<uint64_t, int> ret;
std::pair<std::map<uint64_t, int>::iterator, bool> r;
for (int i = 0; i < reds.size(); ++i)
{
r = ret.insert({ reds[i], 1 });
if (!r.second)
{
++ret[reds[i]];
}
}
return ret;
}
std::map<uint64_t, int> getTotalTimes(std::vector<uint64_t> redblues)
{
std::map<uint64_t, int> ret;
std::pair<std::map<uint64_t, int>::iterator, bool> r;
for (int i = 0; i < redblues.size(); ++i)
{
r = ret.insert({ redblues[i], 1 });
if (!r.second)
{
++ret[redblues[i]];
}
}
return ret;
}
int main()
{
std::vector<uint64_t> reds;
std::vector<uint16_t> blues;
std::vector<uint64_t> redblues;
initData(reds, blues, redblues);
auto ret = getBlueTimes(blues);
for (auto it = ret.begin(); it != ret.end(); ++it)
{
printf("%s: %d\n", printInt16(it->first).c_str(), it->second);
}
printf("========================================\n");
auto redRet = getRedTimes(reds);
for (auto it = redRet.begin(); it != redRet.end(); ++it)
{
if (it->second > 1)
{
printf("%s: %d\n", printInt64(it->first).c_str(), it->second);
}
}
printf("========================================\n");
auto totalRet = getTotalTimes(redblues);
for (auto it = totalRet.begin(); it != totalRet.end(); ++it)
{
if (it->second > 1)
{
printf("%s: %d\n", printTotal(it->first).c_str(), it->second);
}
}
printf("========================================\n");
while (1)
{
uint64_t rs = 0;
uint16_t bs = 0;
uint64_t ts = 0;
printf("请输入5位红球数[1-35 不可重复]: ");
for (int i = 0; i < 5; ++i)
{
uint64_t val = 0;
scanf("%lu", &val);
uint64_t tmp = 1;
tmp <<= val;
rs |= tmp;
}
printf("请输入2位蓝球数[1-12 不可重复]: ");
for (int i = 0; i < 2; ++i)
{
uint16_t val = 0;
scanf("%u", &val);
uint16_t tmp = 1;
tmp <<= val;
bs |= tmp;
}
ts = rs;
ts <<= 12;
ts |= bs;
auto bit = ret.find(bs);
if (bit != ret.end())
{
printf("当前蓝球:%s 已出现:%d次\n", printInt16(bs).c_str(), bit->second);
}
else
{
printf("当前蓝球:%s从未出现\n", printInt16(bs).c_str());
}
auto rit = redRet.find(rs);
if (rit != redRet.end())
{
printf("当前红球:%s 已出现:%d次\n", printInt64(rs).c_str(), rit->second);
}
else
{
printf("当前红球:%s从未出现\n", printInt64(rs).c_str());
}
auto tit = totalRet.find(ts);
if (tit != totalRet.end())
{
printf("当前组合:%s 已出现:%d次\n", printTotal(ts).c_str(), tit->second);
}
else
{
printf("当前组合:%s从未出现\n", printTotal(ts).c_str());
}
}
std::cout << "Hello World!\n";
return 0;
}
效果展示: