import re
import pandas as pd
import numpy as np
import json
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.externals import joblib
from sklearn import svm
import math
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from xgboost import plot_importance
def get_query_predictions_values(x, index):
x = json.loads(x)
length = len(x)
if length <= index:
return 0
else:
x = x.values()
x = [float(value) for value in x]
x = sorted(x)
return x[index]
def get_query_prediction_keys(x):
x = json.loads(x)
x = x.keys()
x = [remove_cha(value) for value in x]
return ' '.join(x)
def get_max_query_prediction(x):
x = json.loads(x)
if len(x) == 0:
return 0
x = x.values()
return max(x)
def remove_cha(x):
x = re.sub('[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+', "", str(x))
x = x.replace('2C', '')
return x
def len_title_in_query(title, query):
query = query.split(' ')
if len(query) == 0:
return 0
l = 0
for value in query:
#print(value)
if value.find(title) >= 0:
#print(l)
l += 1
# print(l)
return l
def is_prefix_in_title(prefix, title):
#print(prefix)
#print(title)
return title.find('prefix')
train_data = pd.read_table('./data/oppo_round1_train_20180929.txt',
names= ['prefix','query_prediction','title','tag','label'], header= None, encoding='utf-8').astype(str)
val_data = pd.read_table('./data/oppo_round1_vali_20180929.txt',
names = ['prefix','query_prediction','title','tag','label
深度学习笔记--XGBOOST算法应用ctr预估模型
最新推荐文章于 2022-04-28 20:17:13 发布