import xgboost as xgb
import lightgbm as lgbm
import pandas as pd
import numpy as np
import pymysql
import xgboost as xgb
import lightgbm as lgbm
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from datetime import date, timedelta
import gc
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
from datetime import datetime
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import pymysql
from sklearn.decomposition import PCA
from flask import Flask, request,session
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from flask_cors import CORS
import psycopg2
from dataclasses import dataclass
import pymysql
from flask import Flask, request, url_for, redirect, render_template, jsonify
app = Flask(__name__)
import warnings
warnings.filterwarnings("ignore")
import joblib
import datetime
#import mysql.connector
import time
import random
import json
import collections
import random
import matplotlib.pyplot as plt
import os
import copy
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
import xgboost as xgb
import psycopg2
import json
import psycopg2
from psycopg2 import Binary
import joblib
import pickle
import lightgbm as lgb
from joblib import dump, load
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sqlalchemy import create_engine
import json
from gevent import pywsgi
import knn
import creat_table
import random_forest
app = Flask(__name__)
CORS(app)
#################################################################################### 1 获取数据
# host = "localhost"
# port = "5432"
# database = "copd"
# user = "postgres"
# password = 111111
#
host = "10.16.48.219"
port = "5432"
database = "software1"
user = "pg"
password = 111111
def connect_pg():
pg_connection = psycopg2.connect(database=database,
user=user,
password=password,
host=host,
port=port)
return pg_connection
def connect_mysql():
connection = pymysql.connect(
host='10.16.48.219',
user='root',
password='111111',
database='medical',
cursorclass=pymysql.cursors.DictCursor
)
return connection
## connection, table_name
def get_data(connection, table_name):
query = f"select * from \"{table_name}\""
data = pd.read_sql(query, connection)
connection.close()
return data
#################################################################################### 2 数据处理:处理 字段、特征
def per_data(data):
# 1 列名(直接获取列名)
params = data.columns
print(params)
# 2 筛选列名(自己定义需要的字段)
params = [col for col in params if col not in ['Case_ID', 'BUN', 'M1_M2', 'TH2','TH2', 'IBILI', 'GLO']]
features = data[params]
features = features.fillna(0)
print(data)
print(features)
# 3 筛选样本(根据test_id)
train = features.iloc[:150]
test = features.iloc[150:]
return train,test
#################################################################################### 3 训测串行_训练预测
# 模型选择 xgb
def model_xgb(train,test):
params = {
"booster": 'gbtree',
'objective': 'binary:logistic',
'eval_metric': 'auc',
'silent': 0, # (静默模式,1开0关)
'eta': 0.01, # (0.01~0.2,,,0.01)
'max_depth': 5, # (3~10,,,6)
'min_child_weight': 1,
'gamma': 0,
'lambda': 1,
'colsample_bylevel': 0.7, # (作用与subsample相似)
'colsample_bytree': 0.7, # (0.5~1)
'subsample': 0.9, # (0.5~1)
'scale_pos_weight': 1, # (算法更快收敛)
}
dtrain = xgb.DMatrix(train.drop(['AGE', 'SEX'], axis=1), label=train['SEX'])
dtest = xgb.DMatrix(test.drop(['AGE', 'SEX'], axis=1))
watchlist = [(dtrain, 'train')]
model = xgb.train(params, dtrain, 200, watchlist)
predict = model.predict(dtest)
predict = pd.DataFrame(predict, columns=['target'])
return predict
# 模型选择 gbdt
def model_gbdt(train,test):
print("%%%%训练_model_gbdt%%%%")
gbdt_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
gbdt_model.fit(X_train, y_train)
y_pred = gbdt_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
return y_pred
# 模型选择 lr
def model_lr(train,test):
print("%%%%训练_model_lr%%%%")
lr_model = LogisticRegression(random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
return y_pred
# 模型选择 svm
def model_svm(train,test):
print("%%%%训练_model_svm%%%%")
svm_model = LinearSVC(C=1.0, random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
return y_pred
# 模型选择 rf
def model_rf(train,test):
print("%%%%训练_model_rf%%%%")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
return y_pred
# 模型选择 cart
def model_cart(train,test):
print("%%%%训练_model_cart%%%%")
cart_model = DecisionTreeClassifier(criterion='gini', max_depth=5, random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
cart_model.fit(X_train, y_train)
y_pred = cart_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
return y_pred
# input(疾病标号 算法编号 是否训练 预测ID)
# 训练算法模型,保存模型训练结果。
# output(训练:保存模型训练结果,预测:ID_预测概率 )
def traintest_model(disease_code, algorithm_code, is_train, test_id):
connection = connect_pg()
data = get_data(connection, "Diabetes")
train,test = per_data(data)
if algorithm_code == 1:
predict = model_xgb(train,test)
print(predict)
print('model_xgb')
elif algorithm_code == 2:
predict = model_gbdt(train, test)
print(predict)
print('model_gbdt')
elif algorithm_code == 3:
predict = model_lr(train, test)
print(predict)
print('model_lr')
elif algorithm_code == 4:
predict = model_svm(train, test)
print(predict)
print('model_svm')
elif algorithm_code == 5:
predict = model_rf(train, test)
print(predict)
print('model_rf')
elif algorithm_code == 6:
predict = model_cart(train, test)
print(predict)
print('model_cart')
else:
return 0
return predict
#################################################################################### 4 训测分离
#xgb
def train_xgb(train,test):
params = {
"booster": 'gbtree',
'objective': 'binary:logistic',
'eval_metric': 'auc',
'silent': 0, # (静默模式,1开0关)
'eta': 0.01, # (0.01~0.2,,,0.01)
'max_depth': 5, # (3~10,,,6)
'min_child_weight': 1,
'gamma': 0,
'lambda': 1,
'colsample_bylevel': 0.7, # (作用与subsample相似)
'colsample_bytree': 0.7, # (0.5~1)
'subsample': 0.9, # (0.5~1)
'scale_pos_weight': 1, # (算法更快收敛)
}
dtrain = xgb.DMatrix(train.drop(['AGE', 'SEX'], axis=1), label=train['SEX'])
dtest = xgb.DMatrix(test.drop(['AGE', 'SEX'], axis=1))
watchlist = [(dtrain, 'train')]
model = xgb.train(params, dtrain, 200, watchlist)
predict = model.predict(dtest)
predict = pd.DataFrame(predict, columns=['target'])
################################ 连接到 pg 数据库
connection = connect_pg()
cursor = connection.cursor()
# current_time = datetime.datetime.now()
# time = current_time
id = 1
model_name = 'xgboost'
model_description = 'xgboost_model'
trainer = 'Gpb'
model_str = pickle.dumps(model)
# 构造 SQL INSERT 语句
query = "INSERT INTO train_model2 (id, model_name, model_description, trainer, training_parameters) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(query, (id, model_name, model_description, trainer, model_str))
connection.commit()
# 关闭数据库连接
cursor.close()
connection.close()
# ################################ 预测
# connection = connect_pg()
# cursor = connection.cursor()
# # # 从数据库中检索模型字段
# query = "SELECT training_parameters FROM train_model WHERE id = 3" # 假设模型 ID 为 1
# cursor.execute(query)
# model_str = cursor.fetchone()[0]
# loaded_model1111111 = pickle.loads(model_str)
# predictsloaded_model1111111 = loaded_model1111111.predict(dtest)
return 0
#gbdt
def train_gbdt(train,test):
print("%%%%训练_model_gbdt%%%%")
gbdt_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
gbdt_model.fit(X_train, y_train)
y_pred = gbdt_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
################################ 连接到 pg 数据库
connection = connect_pg()
cursor = connection.cursor()
# current_time = datetime.datetime.now()
# time = current_time
id = 2
model_name = 'gbdt'
model_description = 'gbdt_model'
trainer = 'Gpb'
model_str = pickle.dumps(gbdt_model)
# 构造 SQL INSERT 语句
query = "INSERT INTO train_model2 (id, model_name, model_description, trainer, training_parameters) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(query, (id, model_name, model_description, trainer, model_str))
connection.commit()
# 关闭数据库连接
cursor.close()
connection.close()
# ################################ 预测
# connection = connect_pg()
# cursor = connection.cursor()
# # # 从数据库中检索模型字段
# query = "SELECT training_parameters FROM train_model WHERE id = 4" # 假设模型 ID 为 1
# cursor.execute(query)
# model_str = cursor.fetchone()[0]
# loaded_model1111111 = pickle.loads(model_str)
# predictsloaded_model1111111 = loaded_model1111111.predict(x_test)
return 0
#lr
def train_lr(train,test):
print("%%%%训练_model_lr%%%%")
lr_model = LogisticRegression(random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
################################ 连接到 pg 数据库
connection = connect_pg()
cursor = connection.cursor()
# current_time = datetime.datetime.now()
# time = current_time
id = 3
model_name = 'lr'
model_description = 'lr_model'
trainer = 'Gpb'
model_str = pickle.dumps(lr_model)
# 构造 SQL INSERT 语句
query = "INSERT INTO train_model2 (id, model_name, model_description, trainer, training_parameters) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(query, (id, model_name, model_description, trainer, model_str))
connection.commit()
# 关闭数据库连接
cursor.close()
connection.close()
# ################################ 预测
# connection = connect_pg()
# cursor = connection.cursor()
# # # 从数据库中检索模型字段
# query = "SELECT training_parameters FROM train_model WHERE id = 33" # 假设模型 ID 为 1
# cursor.execute(query)
# model_str = cursor.fetchone()[0]
# loaded_model1111111 = pickle.loads(model_str)
# predictsloaded_model1111111 = loaded_model1111111.predict(x_test)
return 0
#svm
def train_svm(train,test):
print("%%%%训练_model_svm%%%%")
svm_model = LinearSVC(C=1.0, random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
################################ 连接到 pg 数据库
connection = connect_pg()
cursor = connection.cursor()
# current_time = datetime.datetime.now()
# time = current_time
id = 4
model_name = 'svm'
model_description = 'svm_model'
trainer = 'Gpb'
model_str = pickle.dumps(svm_model)
# 构造 SQL INSERT 语句
query = "INSERT INTO train_model2 (id, model_name, model_description, trainer, training_parameters) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(query, (id, model_name, model_description, trainer, model_str))
connection.commit()
# 关闭数据库连接
cursor.close()
connection.close()
# ################################ 预测
# connection = connect_pg()
# cursor = connection.cursor()
# # # 从数据库中检索模型字段
# query = "SELECT training_parameters FROM train_model WHERE id = 441" # 假设模型 ID 为 1
# cursor.execute(query)
# model_str = cursor.fetchone()[0]
# loaded_model1111111 = pickle.loads(model_str)
# predictsloaded_model1111111 = loaded_model1111111.predict(x_test)
return 0
#rf
def train_rf(train,test):
print("%%%%训练_model_rf%%%%")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
################################ 连接到 pg 数据库
connection = connect_pg()
cursor = connection.cursor()
# current_time = datetime.datetime.now()
# time = current_time
id = 5
model_name = 'rf'
model_description = 'rf_model'
trainer = 'Gpb'
model_str = pickle.dumps(rf_model)
# 构造 SQL INSERT 语句
query = "INSERT INTO train_model2 (id, model_name, model_description, trainer, training_parameters) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(query, (id, model_name, model_description, trainer, model_str))
connection.commit()
# 关闭数据库连接
cursor.close()
connection.close()
# ################################ 预测
# connection = connect_pg()
# cursor = connection.cursor()
# # # 从数据库中检索模型字段
# query = "SELECT training_parameters FROM train_model WHERE id = 55" # 假设模型 ID 为 1
# cursor.execute(query)
# model_str = cursor.fetchone()[0]
# loaded_model1111111 = pickle.loads(model_str)
# predictsloaded_model1111111 = loaded_model1111111.predict(x_test)
return 0
#cart
def train_cart(train,test):
print("%%%%训练_model_cart%%%%")
cart_model = DecisionTreeClassifier(criterion='gini', max_depth=5, random_state=42)
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
cart_model.fit(X_train, y_train)
y_pred = cart_model.predict(x_test)
y_pred = pd.DataFrame(y_pred, columns=['prob'])
################################ 连接到 pg 数据库
connection = connect_pg()
cursor = connection.cursor()
# current_time = datetime.datetime.now()
# time = current_time
id = 6
model_name = 'cart'
model_description = 'cart_model'
trainer = 'Gpb'
model_str = pickle.dumps(cart_model)
# 构造 SQL INSERT 语句
query = "INSERT INTO train_model2 (id, model_name, model_description, trainer, training_parameters) VALUES (%s, %s, %s, %s, %s)"
cursor.execute(query, (id, model_name, model_description, trainer, model_str))
connection.commit()
# 关闭数据库连接
cursor.close()
connection.close()
# ################################ 预测
# connection = connect_pg()
# cursor = connection.cursor()
# # # 从数据库中检索模型字段
# query = "SELECT training_parameters FROM train_model WHERE id = 66" # 假设模型 ID 为 1
# cursor.execute(query)
# model_str = cursor.fetchone()[0]
# loaded_model1111111 = pickle.loads(model_str)
# predictsloaded_model1111111 = loaded_model1111111.predict(x_test)
return 1
def test(train,test,model_id):
connection = connect_pg()
cursor = connection.cursor()
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
# # 从数据库中检索模型字段
query = "SELECT training_parameters FROM train_model2 WHERE id = model_id" # 假设模型 ID 为 1
cursor.execute(query)
model_str = cursor.fetchone()[0]
loaded_model1111111 = pickle.loads(model_str)
predictsloaded_model1111111 = loaded_model1111111.predict(x_test)
return predictsloaded_model1111111
# input(疾病标号 算法编号 是否训练 预测ID)
# 训练算法模型,保存模型训练结果。
# output(训练:保存模型训练结果,预测:ID_预测概率 )
def train_test(disease_code, algorithm_code, is_train, test_id):
if is_train==1:
connection = connect_pg()
data = get_data(connection, "Diabetes")
train,test = per_data(data)
if algorithm_code == 1:
predict = train_xgb(train,test)
print(predict)
print('train_test_xgb')
elif algorithm_code == 2:
predict = train_gbdt(train, test)
print(predict)
print('train_test_gbdt')
elif algorithm_code == 3:
predict = train_lr(train, test)
print(predict)
print('train_test_lr')
elif algorithm_code == 4:
predict = train_svm(train, test)
print(predict)
print('train_test_svm')
elif algorithm_code == 5:
predict = train_rf(train, test)
print(predict)
print('train_test_rf')
elif algorithm_code == 6:
predict = train_cart(train, test)
print(predict)
print('train_test_cart')
else:
return 0
return predict
else:
print("预测模型:")
print(algorithm_code)
connection = connect_pg()
data = get_data(connection, "Diabetes")
train,test = per_data(data)
connection = connect_pg()
cursor = connection.cursor()
X_train = train.drop(['AGE', 'SEX'], axis=1)
x_test = test.drop(['AGE', 'SEX'], axis=1)
y_train = train['SEX']
# # 从数据库中检索模型字段
query = "SELECT training_parameters FROM train_model2 WHERE id = %s" # 假设模型 ID 为 1
cursor.execute(query, (algorithm_code,))
model_str = cursor.fetchone()[0]
loaded_model1111111 = pickle.loads(model_str)
if algorithm_code == 1:
dtest = xgb.DMatrix(x_test)
predictsloaded_model1111111 = loaded_model1111111.predict(dtest)
else:
predictsloaded_model1111111 = loaded_model1111111.predict(x_test)
return predictsloaded_model1111111
#predict = traintest_model(1,6,1,1)
predict = train_test(1,6,0,1)
print(predict)
05-05
614
05-25
1802