python代码，用pandas实现递归数据查找，解决apply单线程的模式

暮色花空

于 2023-11-30 14:52:24 发布

阅读量156

点赞数

分类专栏： python 文章标签： python pandas 开发语言

本文链接：https://blog.csdn.net/jsg_0311/article/details/134712452

版权

python 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

def select_main_object_dg(index, max_rows,dic_rows):
    try:
        memory = Memory(location='your_cache_directory', verbose=0)
        program_start_time=time.time()
        row_jg=pd.DataFrame()
        select_sql = ("select mo.id,mo.pid from school_info_dev.main_object mo where mo.is_deleted=0 limit %s,%s")% (index * max_rows, max_rows)
        with Session() as session:
            rows_main = pd.read_sql_query(select_sql, session.bind)
            rows_main1 = rows_main
            row_jg['main_object_id'] = rows_main['id'].to_frame()
            if not rows_main.empty:
                print("第%d次：机构数（机构数）数据处理开始时间：%s" % (index, tool.time_nyr(program_start_time)))
                #rows_main['row_jg'] = rows_main['id'].apply(jigou, extra_param=rows_main_all)
                chunks = [rows_main1.iloc[i:i + len(rows_main)] for i in range(0, len(rows_main1), len(rows_main))]
                processed_rows = Parallel(n_jobs=-1)(delayed(process_chunk)(id, dic_rows,rows_main) for id in chunks)
                result_df = pd.concat(processed_rows)
                memory.clear()
                jigou_dic = ['yjjgid', 'ejjgid', 'sjjgid', 'sijjgid', 'wjjgid']
                print("第%d次：机构数（机构数）apply结束：%s，耗时：%.2f" % (
                index, tool.time_nyr(time.time()), (time.time() - program_start_time)))
                for i, value in enumerate(jigou_dic):
                    row_jg[value] = [x[i] if len(x)>i else None for x in result_df['row_jg']]
                row_jg.to_sql('main_object_jigou', con=session.bind, index=False, if_exists='append', chunksize=1000)
                print("第%d次：机构数（机构数）处理结束时间：%s，耗时：%.2f" % ( index,tool.time_nyr(time.time()), (time.time() - program_start_time)))
    except Exception as e:
        print('select_main_object_all',e,{index})
    finally:
        memory.clear() #这里用法是为了是否内存，因为用了Parallel后 系统的内存会爆涨

def process_chunk(rows_main1, dic_rows, rows_main):
    rows_main['row_jg'] = rows_main1['id'].apply(jigou, extra_param=dic_rows)
    return rows_main #这里用了2个一样的rows_main  一个是1一个不是  是为了合并的时候不报错

children_all = []
def jigou(x,extra_param):

    try:
        if not pd.isna(x):
            child_nodes = find_children(extra_param, extra_param[extra_param['id'] == x]['id'].values[0])
            reversed_list = child_nodes[::-1]
            #反转数据 只插入前面的5组数据
            if reversed_list[:7][::-1] not in children_all:
               children_all.append(reversed_list[:7][::-1] )
            return reversed_list
    except Exception as e:
        print('jigou',e)
def find_children(df, parent_id):
    try:
        children = []
        if children_all:#如果不为空
            for c_a_i in children_all:
                if parent_id in c_a_i:
                    sum =c_a_i.index(parent_id)
                    children_q=c_a_i[sum:]
                    children.extend(children_q)
                    return children
        child_rows = df[df['id'] == parent_id]
        for index, row in child_rows.iterrows():
            children.append(row['id'])
            children.extend(find_children(df, row['pid']))
        return children
    except Exception as e:
        print('find_children',e)

暮色花空

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
python代码，用pandas实现递归数据查找，解决apply单线程的模式

print("第%d次：机构数（机构数）处理结束时间：%s，耗时：%.2f" % ( index,tool.time_nyr(time.time()), (time.time() - program_start_time)))print("第%d次：机构数（机构数）数据处理开始时间：%s" % (index, tool.time_nyr(program_start_time)))print("第%d次：机构数（机构数）apply结束：%s，耗时：%.2f" % (#反转数据只插入前面的5组数据。
复制链接

扫一扫

专栏目录