1.传入用户参数:
sex【男/女/未知】
age【数字/未知】
kemuname【李白/贺知章/...】
2.得到用户标签:
age【少儿/青少年/青年/中年/老年/未知】
label【儿童/男/女/通用】
2.1 标签逻辑
3.题目推荐
题目1:在 [可以推荐+15天内没有被做过+对应科目]的题库下,随机抽一题
题目2:在[可以推荐+age下的难度等级为1+对应科目+对应label]的题库下,随机抽一题
题目3:在 [可以推荐+15天内没有被做过+对应科目]的题库下,随机抽一题
题目4:在[可以推荐+age下的难度等级为2or3+对应科目+对应label]的题库下,随机抽一题
题目5:在[可以推荐+age下的难度等级为4or5+对应科目+对应label]的题库下,随机抽一题
如果高难度题库中没有题,则去低难度题库中抽题;如果低难度题库中没有题,则去冷启动题库中抽题
代码:
*******************************************************************************************
import os, pathlib, sys
import math
import re
import json
import random
import time,datetime
from utils.pymysql_tools import UsingMysql
from collections import defaultdict
import numpy
from utils.logger import logger, monitorLogger
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
# ******************** 公共函数 ****************************
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, numpy.integer):
return int(obj)
elif isinstance(obj, numpy.floating):
return float(obj)
elif isinstance(obj, numpy.ndarray):
return obj.tolist()
else:
return super(MyEncoder, self).default(obj)
# **********************************************************
class ChallengepoetModel:
def __init__(self):
pass
def load_file_list(self, x):
with open(x, 'r', encoding='utf8') as file_to_read:
lines = list(i.replace('\n', '') for i in file_to_read.readlines())
return lines
def load_file_dict(self, x):
with open(x, 'r', encoding='utf8') as file_to_read:
lines = list(i.replace('\n', '') for i in file_to_read.readlines())
lines = ''.join(lines)
temp_dict = json.loads(lines)
return temp_dict
def get_age(self, age):
if not age:
return '-'
try:
if age == '未知':
return '未知'
else:
age = int(str(age))
if age < 13:
return '少儿'
elif age >= 13 and age < 18:
return '青少年'
elif age >= 18 and age < 46:
return '青年'
elif age >= 46 and age < 69:
return '中年'
elif age > 69:
return '老年'
else:
return '-'
except Exception as x:
return '-'
def label_type(self,age,sex):
if age == '未知':
if sex == '男':
label_t = '男'
return label_t
elif sex == '女':
label_t = '女'
return label_t
elif sex == '未知':
label_t = '通用'
return label_t
elif age == '少儿':
label_t = '儿童'
return label_t
elif age in ['青少年','青年','中年','老年']:
if sex == '男':
label_t = '男'
return label_t
elif sex == '女':
label_t = '女'
return label_t
elif sex == '未知':
label_t = '通用'
return label_t
def train(self, info_json,request_id):
if isinstance(info_json, str):
info = json.loads(info_json)
else:
info = info_json
sex=info['sex']
age=info['age']
age = self.get_age(age)
label_t = self.label_type(age, sex)
kemuname=info['kemuname']
with UsingMysql(log_time=False) as um:
# 保存用于分析
#热题
sql="""
select item_id,channel_name,level_age_15d,label_biz_type
from tzsr_question_level
where status=1 and num_all_15d > 0 and channel_name is not null and label_biz_type = '通用' or '{label_t}'
""".format(label_t = label_t)
ds_data = um.fetch_all(sql)
#ds_data:[{'item_id':'xx','channel_name':'xxx',···},{'item_id':'xx','channel_name':'xxx',···}, ···]
#print(ds_data)
#变量处理,将年龄层和等级分隔
for i in ds_data:
if i['channel_name']:
i['channel_name']=re.split("-",i['channel_name'])[2]
if i["level_age_15d"]:
level=re.split('[:,:,:,:,:,:]+', i["level_age_15d"])
i[level[0]]=level[1]
i[level[2]] = level[3]
i[level[4]] = level[5]
i[level[6]] = level[7]
i[level[8]] = level[9]
i[level[10]] = level[11]
#print(ds_data)
#条件题目及等级
item_list=list()#热题列表
for i in ds_data:
item_dict=dict()
if i['channel_name'] == kemuname:
item_dict['item_id']=i['item_id']
item_dict['level']=i[age]
item_list.append(item_dict)
#print(item_list)
#冷启动题
cold_boot_sql = """
select item_id,channel_name
from tzsr_question_level
where status = 1 and num_all_15d = 0 and channel_name IS NOT NULL
"""
cold_item_id = um.fetch_all(cold_boot_sql)
cold_item_id2 = []#冷启动题列表
for i in cold_item_id:
i['channel_name'] = re.split("-", i['channel_name'])[2]
if i['channel_name'] == kemuname:
cold_item_id2.append(i['item_id'])
#print(cold_item_id2)
### 题目画像逻辑
easy = list()#难度为1的题目列表
middel = list()#难度为2 3的题目列表
hard = list()#难度为4 5的题目列表
for i in item_list:
if int(i['level']) == 1:
easy.append(i)
elif int(i['level']) < 4:
middel.append(i)
else:
hard.append(i)
print(easy)
print(middel)
print(hard)
random.seed(823)
#在热题列表中抽题
if easy:
a = random.choice(easy)
if middel:
b = random.choice(middel)
if hard:
c = random.choice(hard)
else:
c = random.choice(middel)
else:
b = random.choice(easy)
c = random.choice(easy)
else:
a = random.choice(cold_item_id2)
b = random.choice(cold_item_id2)
c = random.choice(cold_item_id2)
#在冷题列表中抽题
if cold_item_id2:
cold1 = random.choice(cold_item_id2)
cold2 = random.choice(cold_item_id2)
else:
cold1 = random.choice(easy)
cold2 = random.choice(middel)
a = a['item_id']
b = b['item_id']
c = c['item_id']
rec_list = [cold1,a,cold2,b,c]#得到五个题目id的推荐列表
print(rec_list)
print(info)
# with UsingMysql(log_time=False) as save_um:
# # 保存用于分析
# save_sql="""
# INSERT INTO shilu_recommend_result_analysis (userids,rec,ds) VALUES (%s,%s,%s)
# """
# insert_params = (','.join(userids), ','.join(ret_list), today_ds)
# save_um.cursor.execute(save_sql, insert_params)
return rec_list
if __name__ == '__main__':
pass
不完整,期待我继续更新。。。。