result1 = []
for n in data['did'].tolist():
if n.isdigit() == True and len(n)==15:
print(n)
#h1
h1.update(n.encode('utf-8'))
result1.append(h1.hexdigest())
# 不带声调的(style=pypinyin.NORMAL)
def pinyin(word):
s = ''
for i in pypinyin.pinyin(word, style=pypinyin.NORMAL):
s += ''.join(i)
return s
# 带声调的(默认)
def yinjie(word):
s = ''
# heteronym=True开启多音字
for i in pypinyin.pinyin(word, heteronym=True):
s = s + ''.join(i) + " "
return s
class MyConverter(mysql.connector.conversion.MySQLConverter):
def row_to_python(self, row, fields):
row = super(MyConverter, self).row_to_python(row, fields)
def to_unicode(col):
if type(col) == bytearray:
return col.decode('utf-8')
return col
return [to_unicode(col) for col in row]
MYSQL 库连接
def mysqlcon(host, port, user, password, database, sql, ret):
mysqlcon = mysql.connector.connect(
host=host,
port=port,
user=user,
passwd=password,
database=database, use_unicode=False, converter_class=MyConverter
)
mysqlcurs = mysqlcon.cursor(buffered=True)
# mysql
mysqlcurs.execute(sql)
if ret is True:
myresult = mysqlcurs.fetchall() # fetchall() 获取所有记录
return myresult
mysqlcon.close()
mysqlcurs.close()
elif ret is False:
mysqlcon.commit()
mysqlcon.close()
mysqlcurs.close()
从列表里移除单词
def remove_word(alllist):
regex = '角色ID'
for num in range(len(alllist)):
if re.search(regex, alllist[num]) is not None:
alllist[num] = 'used'
data_dic = []
regex = ':'
for num in range(len(alllist)):
if re.search(regex, alllist[num]) is not None:
# print(alllist[num])
data_dic.append(alllist[num])
fin_dic = []
for num in range(len(data_dic)):
if re.search('[0-9]', data_dic[num]) is not None:
# print(data_dic[num])
fin_dic.append(data_dic[num])
return fin_dic
pandas寻找位置
def find_pd(word, pd):
index_ = []
for num_row in range(pd.shape[0]): # 5
for num_col in range(pd.shape[1]): # 4
if pd.iloc[num_row, num_col] == word:
print([num_row, num_col])
index_.append([num_row, num_col])
else:
pass
return index_
RUN
if __name__ == '__main__':
markdown 在字母正下方插入下标
$\underset {\theta}{min}$
抽奖
import random
from collections import Counter
N = 1000000
def rand_num():
n_l,e_l = [],[]
while len(n_l)<5:
t = random.randint(1,35)
if t in n_l:
pass
else:
n_l.append(t)
n_l.sort()
while len(e_l)<2:
t = random.randint(1,12)
if t in e_l:
pass
else:
e_l.append(t)
e_l.sort()
l = n_l +e_l
return l
sum_ = []
for n in range(N):
sum_.append(str(rand_num()))
dict_ = {}
for key in sum_:
dict_[key] = dict_.get(key, 0) + 1
def top_n_scores(n, score_dict):
lot = [(k,v) for k, v in dict_.items()] #make list of tuple from scores dict
nl = []
while len(lot)> 0:
nl.append(max(lot, key=lambda x: x[1]))
lot.remove(nl[-1])
return nl[0:n]
top_n_scores(4, dict_)
select
*
from
(
select
tb1.warId, tb1.beginTime, tb1.server, tb2.partyname, tb3.kandd, tb4.warpersoncount, tb5.maxkill, tb1.spaceline
from
(
select
DISTINCT warId, beginTime , server , CONCAT(name , ' ', lineid, '线') "spaceline"
from
(
select
distinct CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId", server , mapId , beginTime , endTime, lineid
from
pss_ana.map_fight_info
order by
warId )tb1
left join (
select
id, name
from
game_config.config_map_name )tb2 on
tb1.mapId = tb2.id
order by
warId )tb1
left join (
select
tbparty.warId, server, GROUP_CONCAT(tbparty.partyName SEPARATOR ',') partyname
from
(
select
DISTINCT warId, partyName, server
from
(
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId", killGuildId "partyId" , killGuildName "partyName", 'kill' as "state" , server
from
pss_ana.map_fight_info
union all
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId" , dieGuildId "partyId" , dieGuildName "partyName", 'killed' as "state" , server
from
pss_ana.map_fight_info )tb1 )tbparty
group by
tbparty.warId, server )tb2 on
tb1.warId = tb2.warId
and tb1.server = tb2.server
left join (
select
warId, group_concat(state SEPARATOR ',') "kandd", server
from
(
select
warId, concat('[', partyName, ']', state, ':', max(count)) "state" , server
from
(
select
warId, partyName, state, COUNT(1) as "count", server
from
(
select
warId, partyName, state, server
from
(
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId", killGuildId "partyId" , killGuildName "partyName", '杀人' as "state", server
from
pss_ana.map_fight_info
union all
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId" , dieGuildId "partyId" , dieGuildName "partyName", '被杀' as "state", server
from
pss_ana.map_fight_info )tbparty
order by
warId, partyName )tbkill
group by
warId, partyName, state, server
union all
select
DISTINCT warId, PartyName, '杀人' as "state", 0 as "count(1)", server
from
(
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId" , killGuildName "partyName", server
from
pss_ana.map_fight_info
union all
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId" , dieGuildName "partyName", server
from
pss_ana.map_fight_info )tb1
union all
select
DISTINCT warId, PartyName, '被杀' as "state", 0 as "count(1)", server
from
(
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId" , killGuildName "partyName", server
from
pss_ana.map_fight_info
union all
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId" , dieGuildName "partyName", server
from
pss_ana.map_fight_info )tb1
order by
warId )tb1
group by
warId, partyName, state, server )tbkill1
group by
warId, server )tb3 on
tb1.warId = tb3.warId
and tb1.server = tb3.server
left join (
select
warId, COUNT(DISTINCT id ) "warpersoncount", server
from
(
select
warId, id, server
from
(
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId", killId "Id", server
from
pss_ana.map_fight_info
union all
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId" , dieId "Id" , server
from
pss_ana.map_fight_info )tbparty
order by
warId )tbczcount
group by
warId, server )tb4 on
tb1.warId = tb4.warId
and tb1.server = tb4.server
left join (
select
warId, count(killid) "maxkill", server
from
(
select
CONCAT(substring(server, 5, 4), ' ', LPAD(lineid, 2, 0), ' ', EXTRACT(YEAR FROM beginTime), ' ', LPAD(EXTRACT(month FROM beginTime), 2, 0), ' ', LPAD(EXTRACT(day from beginTime), 2, 0), ' ', LPAD(id, 4, 0)) "warId", killid, server
from
pss_ana.map_fight_info )tbjsmax
group by
warId, server )tb5 on
tb1.warId = tb5.warId
and tb1.server = tb5.server )tball
pandas列名排序
df = df[ ['mean'] + [ col for col in df.columns if col != 'mean' ] ]
读取文件
import os
from os.path import isfile, join
import pandas as pd
path_ = "D:/dataset/competitive-data-science-predict-future-sales"
pos_files = [path_ + '/' + f for f in os.listdir(path_) if isfile(join(path_, f))]
print(len(pos_files))
def shushustr2sec(shushustr):
day = checkNonelist(re.findall(r"(\d+)天",shushustr))
hour = checkNonelist(re.findall(r"(\d+)小时",shushustr))
minute= checkNonelist(re.findall(r"(\d+)分",shushustr))
second = checkNonelist(re.findall(r"(\d+)秒",shushustr))
#print(day,'\n',hour,'\n',minute,'\n',second)
time = second + minute * 60 + hour *60*60 + day * 86400
return time
堆叠图
import matplotlib.colors as mcolors
def pltbar(titlename,xlabel,label,botV,cenV,topV):
plt.title(titlename)
#plt.figure(figsize=(20,10))
N = len(xlabel)
ind = np.arange(N) #[ 0 1 2 3 4 5 6 7 8 ]
plt.xticks(ind, xlabel)
# plt.ylabel('Scores')
Bottom,Center,Top = botV,cenV,topV
d = []
for i in range(0, len(Bottom)):
sum = Bottom[i] + Center[i]
d.append(sum)
colors=list(mcolors.TABLEAU_COLORS.keys())
p1 = plt.bar(ind, Bottom, color=colors[0])
p2 = plt.bar(ind, Center, bottom=Bottom,color=colors[1])
p3 = plt.bar(ind, Top, bottom=d,color=colors[2])
plt.legend((p1[0], p2[0], p3[0]),label,loc = 2)
plt.show()
pltbar ('adad',['3','4','5'],['11','22','33'],[1,2,3],[3,4,5],[5,1,2])
多柱图
x = np.arange(size)
a = np.random.random(size)
b = np.random.random(size)
c = np.random.random(size)
# total_width, n = 0.8, 3
width = total_width / n
x = x - (total_width - width) / 2
plt.bar(x, a, width=width, label='a')
plt.bar(x + width, b, width=width, label='b')
plt.bar(x + 2 * width, c, width=width, label='c')
plt.legend()
plt.plot(x,a)
plt.plot(x + width,b)
plt.plot(x + 2 * width, c)
# plt.plot(x, y, "r", marker='*', ms=10, label="a")
# plt.xticks(rotation=45)
# plt.legend(loc="upper left")
plt.show()
柱状折线图子图
from matplotlib.pyplot import MultipleLocator
# import matplotlib.figure as fig
fig=plt.figure()
plt.figure(figsize=(15,10))
labels = [str(x) for x in range(0,20)]
fig.tight_layout()#调整整体空白
plt.subplots_adjust(wspace =0.2, hspace =0.5)#调整子图间距
# plt.figure(12)
plt.subplot(421)
plt.ylim(0,max(pltalldata[0])+100)
plt.bar(range(len(pltalldata[0])), pltalldata[0],color=['dodgerblue'])
ax2 = plt.twinx()
plt.plot(range(len(pltratedata[0])), pltratedata[0] , "r", marker='.', ms=1)
# plt.xlim([str(x) for x in range(0,21)])
plt.title('day1')
x_major_locator=MultipleLocator(1)
ax2.xaxis.set_major_locator(x_major_locator)
plt.xlim(-0.5,20)
plt.ylim(0,100)
plt.subplot(422)
plt.ylim(0,max(pltalldata[1])+100)
plt.bar(range(len(pltalldata[1])), pltalldata[1],color=['dodgerblue'])
ax2 = plt.twinx()
plt.plot(range(len(pltratedata[1])), pltratedata[1] , "r", marker='.', ms=1)
# plt.xlim([str(x) for x in range(0,21)])
plt.title('day2')
x_major_locator=MultipleLocator(1)
ax2.xaxis.set_major_locator(x_major_locator)
plt.xlim(-0.5,20)
plt.ylim(0,100)
plt.subplot(423)
plt.ylim(0,max(pltalldata[2])+100)
plt.bar(range(len(pltalldata[2])), pltalldata[2],color=['dodgerblue'])
ax2 = plt.twinx()
plt.plot(range(len(pltratedata[2])),pltratedata[2] , "r", marker='.', ms=1)
# plt.xlim([str(x) for x in range(0,21)])
plt.title('day3')
x_major_locator=MultipleLocator(1)
ax2.xaxis.set_major_locator(x_major_locator)
plt.xlim(-0.5,20)
plt.ylim(0,100)
plt.subplot(424)
plt.ylim(0,max(pltalldata[3])+100)
plt.bar(range(len(pltalldata[3])), pltalldata[3],color=['dodgerblue'])
ax2 = plt.twinx()
plt.plot(range(len(pltratedata[3])), pltratedata[3], "r", marker='.', ms=1)
# plt.xlim([str(x) for x in range(0,21)])
plt.title('day4')
x_major_locator=MultipleLocator(1)
ax2.xaxis.set_major_locator(x_major_locator)
plt.xlim(-0.5,20)
plt.ylim(0,100)
plt.subplot(425)
plt.ylim(0,max(pltalldata[4])+100)
plt.bar(range(len(pltalldata[4])), pltalldata[4],color=['dodgerblue'])
ax2 = plt.twinx()
plt.plot(range(len(pltratedata[4])),pltratedata[4] , "r", marker='.', ms=1)
# plt.xlim([str(x) for x in range(0,21)])
plt.title('day5')
x_major_locator=MultipleLocator(1)
ax2.xaxis.set_major_locator(x_major_locator)
plt.xlim(-0.5,20)
plt.ylim(0,100)
plt.subplot(426)
plt.ylim(0,max(pltalldata[5])+100)
plt.bar(range(len(pltalldata[5])), pltalldata[5],color=['dodgerblue'])
ax2 = plt.twinx()
plt.plot(range(len(pltratedata[5])), pltratedata[5] , "r", marker='.', ms=1)
# plt.xlim([str(x) for x in range(0,21)])
plt.title('day6')
x_major_locator=MultipleLocator(1)
ax2.xaxis.set_major_locator(x_major_locator)
plt.xlim(-0.5,20)
plt.ylim(0,100)
plt.subplot(427)
lastx = ['24', '48', '72', '96','120','144']
lasty = pltlastdata
lasty2 = pltolddata
plt.ylim(0,max(lasty)+100)
plt.bar(lastx,lasty,color=['dodgerblue'])
plt.bar(lastx,lasty2,color=['orange'])
for x1,y1 in zip(lastx,lasty):
plt.text(x1, y1 + 1, str(y1), ha='center', va='bottom', fontsize=10, rotation=0)
for x1,y2 in zip(lastx,lasty2):
plt.text(x1, y2 + 1, str(y2), ha='center', va='bottom', fontsize=10, rotation=0)
plt.title('pred num')
# plt.text(lastx,lasty+3,'%.3f'%lasty)
# x_major_locator=MultipleLocator(1)
# ax2.xaxis.set_major_locator(x_major_locator)
# plt.xlim(-0.5,6)
plt.subplot(428)
lastx = ['24', '48', '72', '96','120','144']
lasty = [round(x,2) for x in pltlastratedata]
plt.ylim(0,100)
plt.bar(lastx,lasty,color=['dodgerblue'])
for x1,y1 in zip(lastx,lasty):
plt.text(x1, y1 + 1, str(y1), ha='center', va='bottom', fontsize=10, rotation=0)
plt.title('pred accuracy')
# plt.text(lastx,lasty+3,'%.3f'%lasty)
# x_major_locator=MultipleLocator(1)
# ax2.xaxis.set_major_locator(x_major_locator)
# plt.xlim(-0.5,6)
list提取中位数,25分位
def takepos(inputlist,pos):
length = len(inputlist)
pos_ = int(length * pos) -1
inputlist.sort()
ans = inputlist[pos_]
return ans
笛卡尔积相乘
def cartesian(l1, l2):
carte = []
for i in itertools.product(l1, l2):
carte.append(i)
df = pd.DataFrame(carte)
return df
推荐系统重排
op = []
typelist = []
for n in range(200):
if len(typelist) >3:
del(typelist[0])
else:
ndf = predf[~predf['goods_id'].isin(op)]
ndf = ndf[~ndf['navigation_name_ch_set'].isin(typelist)]
typelist.append(ndf.iloc[0][2])
op.append(ndf.iloc[0][0])
打压
def pushlist(alist,blist,pushn):
t = []
for n in range(len(alist)):
if alist[-(n+1)] in blist:
t.insert(n - pushn,alist[-(n+1)])
else:
t.append(alist[-(n+1)])
return t[::-1]
强推
def uplist(alist,blist,upn):
t = []
for n in range(len(alist)):
if alist[n] in blist:
t.insert(n - upn,alist[n])
else:
t.append(alist[n])
return t
sql 字段名
select COLUMN_NAME from information_schema.COLUMNS where table_name = 'es_brand'