import datetime
import pandas as pd
import numpy as np
from multiprocessing import Pool,Manager
from functools import partial
import multiprocessing as mp
from joblib import Parallel, delayed
def processParallel(group):
timespan,df=group
df_sub = df.groupby(['code','name',]).agg({'now':['first','last','max','min'],'date':'max','time':'max','timeadj':'max','volume':['min','max'],'close':'max'}).\
reset_index()
df_sub.columns = ['code','name','open','close','high','low','date','time','timeadj','volume_f','volume_l','pre_close']
df_sub['timespan'] = timespan
return df_sub
if __name__ == "__main__":
print(datetime.datetime.now())
# def applyParallel(dfGrouped, func):
# ret = Parallel(n_jobs=10)(delayed(func)(name,group) for name, group in dfGrouped)
# return pd.concat(ret)
# result = applyParallel(df.groupby('timespan'), df_multi.processParallel)
ret = Parallel(n_jobs=12)(delayed(df_multi.processParallel)((name,group)) for name, group in df.groupby('timespan'))
result =pd.concat(ret,ignore_index=True)
print(datetime.datetime.now())
p = Pool(processes=12)
# df_seq = [i*(int(ROWCNT/10)+1) for i in range(6)]
jg = pd.concat(p.map(df_multi.processParallel,[(name ,group) for name, group in df.groupby(['timespan'])]) ,ignore_index=True)
p.daemon=True
p.close()
p.join()
print(datetime.datetime.now())
df_sub = df.groupby(['code','name','timespan']).agg({'now':['first','last','max','min'],'date':'max','time':'max','timeadj':'max','volume':['min','max'],'close':'max'}).\
reset_index()
df_sub.columns = ['code','name','timespan','open','close','high','low','date','time','timeadj','volume_f','volume_l','pre_close']
print(datetime.datetime.now())
2022-10-14 23:16:19.419217
2022-10-14 23:16:35.495221
2022-10-14 23:17:00.529082
2022-10-14 23:17:35.105911