基于买方意向的货物撮合交易

最新推荐文章于 2023-02-28 11:57:28 发布

跳霹雳舞的蒙特卡

最新推荐文章于 2023-02-28 11:57:28 发布

阅读量395

点赞数 1

文章标签：算法 python pandas

本文链接：https://blog.csdn.net/qq_40732962/article/details/111599457

版权

由于兴趣爱好参加CCF大数据与计算智能大赛（2020），遇到题目为“基于买方意向的货物撮合交易”，本人用python中的pandas库处理此问题，最后结果还可以，但是运行时间太长了，所以请教如何对代码进行优化？

比赛的网址为：https://www.datafountain.cn/competitions/468

import pandas as pd


buyer = pd.read_csv(r'\buyer.csv', header=0, encoding='ANSI', low_memory=False)
seller = pd.read_csv(r'\seller.csv', header=0, encoding='ANSI', low_memory=False)
output = pd.read_excel(r'\output.xlsx', header=0)

# 把表分成两类：SR和CF：
buyer_SR = buyer[buyer['品种'] == 'SR']
buyer_CF = buyer[buyer['品种'] == 'CF']
seller_SR = seller[seller['品种'] == 'SR']
seller_CF = seller[seller['品种'] == 'CF']

# 统计买方第一意向
x = pd.DataFrame(buyer_SR)
x['第一志愿和值'] = buyer_SR['第一意向'] + buyer_SR['值']
buyer_first_unique_SR = x.loc[x.duplicated(subset='第一志愿和值') == False, '第一志愿和值'].values
re = filter(lambda x: x == x, buyer_first_unique_SR)
buyer_first_unique_SR = list(re)
buyer_first_unique_SR.sort()

y = pd.DataFrame(buyer_CF)
y['第一志愿和值'] = buyer_CF['第一意向'] + buyer_CF['值']
buyer_first_unique_CF = y.loc[y.duplicated(subset='第一志愿和值') == False, '第一志愿和值'].values
re = filter(lambda x: x == x, buyer_first_unique_CF)
buyer_first_unique_CF = list(re)
buyer_first_unique_CF.sort()
re2 = filter(lambda x: x[:2] == '产地', buyer_first_unique_CF)
buyer_first_unique_1 = list(re2)
re3 = filter(lambda x: x[:2] != '产地', buyer_first_unique_CF)
buyer_first_unique_2 = list(re3)
buyer_first_unique_CF = buyer_first_unique_2 + buyer_first_unique_1

# 建立一个一个的第一意向单元
names = locals()
for i in range(len(buyer_first_unique_SR)):
    names['buyer_unique_SR%s' % i] = buyer_SR[
        (buyer_SR['第一意向'] == buyer_first_unique_SR[i][:2]) & (buyer_SR['值'] == str(buyer_first_unique_SR[i])[2:])]
names = locals()
for i in range(len(buyer_first_unique_CF)):
    names['buyer_unique_CF%s' % i] = buyer_CF[
        (buyer_CF['第一意向'] == buyer_first_unique_CF[i][:2]) & (buyer_CF['值'] == str(buyer_first_unique_CF[i])[2:])]

# 建立判断SR的每个意向单元格对应的seller的货物总数
seller_SR_warehouse_sum = seller_SR.groupby('仓库')['货物数量（张）'].sum()
seller_SR_brand_sum = seller_SR.groupby('品牌')['货物数量（张）'].sum()
seller_SR_year_sum = seller_SR.groupby('年度')['货物数量（张）'].sum()
seller_SR_class_sum = seller_SR.groupby('等级')['货物数量（张）'].sum()


def search_seller_SR_num(column_index_str, index_value_str):
    sum_buyers = 0
    if column_index_str == '仓库':
        sum_buyers = seller_SR_warehouse_sum[seller_SR_warehouse_sum.index == int(index_value_str)].values
    elif column_index_str == '品牌':
        sum_buyers = seller_SR_brand_sum[seller_SR_brand_sum.index == index_value_str].values
    elif column_index_str == '年度':
        sum_buyers = seller_SR_year_sum[seller_SR_year_sum.index == int(index_value_str)].values
    elif column_index_str == '等级':
        sum_buyers = seller_SR_class_sum[seller_SR_class_sum.index == int(index_value_str)].values
    return sum_buyers


# 建立判断CF的每个意向单元格对应的seller的货物总数
seller_CF_warehouse_sum = seller_CF.groupby('仓库')['货物数量（张）'].sum()
seller_CF_area_sum = seller_CF.groupby('产地')['货物数量（张）'].sum()
seller_CF_year_sum = seller_CF.groupby('年度')['货物数量（张）'].sum()
seller_CF_class_sum = seller_CF.groupby('等级')['货物数量（张）'].sum()
seller_CF_category_sum = seller_CF.groupby('类别')['货物数量（张）'].sum()


def search_seller_CF_num(column_index_str, index_value_str):
    sum_buyers = 0
    if column_index_str == '仓库':
        sum_buyers = seller_CF_warehouse_sum[seller_CF_warehouse_sum.index == int(index_value_str)].values
    elif column_index_str == '产地':
        sum_buyers = seller_CF_area_sum[seller_CF_area_sum.index == index_value_str].values
    elif column_index_str == '年度':
        sum_buyers = seller_CF_year_sum[seller_CF_year_sum.index == int(index_value_str)].values
    elif column_index_str == '等级':
        sum_buyers = seller_CF_class_sum[seller_CF_class_sum.index == index_value_str].values
    elif column_index_str == '类别':
        sum_buyers = seller_CF_category_sum[seller_CF_category_sum.index == index_value_str].values
    return sum_buyers


# 函数是为了修改取货后的seller表：
def cut_test(seller_goods_cache):
    return 0

# 函数为了统计当前交易数据所满足的意向：
def check_satisfy(x, check_information2):
    satisfy_str = ''
    for i in range(0, len(check_information2), 2):
        if x[check_information2[i]] == check_information2[i + 1] and satisfy_str == '':
            satisfy_str = satisfy_str + str(int((i + 2) / 2))
        elif x[check_information2[i]] == check_information2[i + 1] and satisfy_str != '':
            satisfy_str = '-'.join([satisfy_str, str(int((i + 2) / 2))])
    if satisfy_str == '':
        satisfy_str = '0'
    return satisfy_str


# ________________________________________________________对SR进行操作_________________________________________________
# 当SR意向单元格的所有货物都能被第一意向seller_SR满足:
def pick_SR_1(buyer_information):
    global output
    check_information = [x for x in buyer_information[4:14] if x == x]
    for i in range(0, len(check_information), 2):
        if check_information[i] != '品牌' and check_information[i] != '等级':
            check_information[i + 1] = int(check_information[i + 1])
    seller_pick_goods = seller[(seller['品种'] == 'SR') & (seller['货物数量（张）'] != 0)]
    for i in range(0, len(check_information), 2):
        # 这里要加个判断！每个意向加进来，筛选后的数量必须>=需求量,否则停止筛选！
        if seller_pick_goods[(seller_pick_goods[check_information[i]] == check_information[i + 1])]['货物数量（张）'].sum() >= \
                buyer_information[2]:
            seller_pick_goods = seller_pick_goods[(seller_pick_goods[check_information[i]] == check_information[i + 1])]
    # 得到的表，按照仓库排序，使货物尽量从一个仓库出货
    start_pick_seller = seller_pick_goods.sort_values(by=['仓库', '货物数量（张）'], ascending=False)
    start_pick_seller['累计货物数量（张）'] = start_pick_seller['货物数量（张）'].cumsum()
    start_pick_seller['是否拣选'] = 1
    start_pick_seller['是否拣选'].where(start_pick_seller['累计货物数量（张）'] <= buyer_information[2], 0, inplace=True)
    final_pick_goods_pd = start_pick_seller[start_pick_seller['是否拣选'] == 1]
    # 还得判断下一个是否加进来
    if len(final_pick_goods_pd) != 0 and final_pick_goods_pd['累计货物数量（张）'].values[-1] < buyer_information[2]:
        final_pick_goods_pd = final_pick_goods_pd.append(start_pick_seller.iloc[len(final_pick_goods_pd), :])
    elif len(final_pick_goods_pd) == 0:
        final_pick_goods_pd = final_pick_goods_pd.append(start_pick_seller.iloc[0, :])
    # 取走的卖方的Index,这个index 还是最大的seller的index,不是seller_SR的index。
    final_pick_goods_index = final_pick_goods_pd.index
    # 每个index所拿走的货物数量：
    final_pick_goods = final_pick_goods_pd['货物数量（张）'].values
    if len(final_pick_goods) == 1:
        final_pick_goods[0] = buyer_information[2]
    else:
        final_pick_goods[-1] = buyer_information[2] - final_pick_goods_pd['累计货物数量（张）'].values[-2]
    # 拿走对应的seller的货：
    judge_binary_list = seller.index.isin(final_pick_goods_index)
    last_pick_goods_number = seller.loc[final_pick_goods_index[-1], '货物数量（张）']
    seller.loc[judge_binary_list, '货物数量（张）'] = seller.loc[judge_binary_list, '货物数量（张）'].apply(cut_test)
    seller.loc[final_pick_goods_index[-1], '货物数量（张）'] = last_pick_goods_number - final_pick_goods[-1]
    # 记录满足意向的情况,并且转化成字符串的形式:
    final_pick_goods_satisfy = final_pick_goods_pd.apply(check_satisfy, args=(check_information,), axis=1).tolist()
    # 每次跑完一条买方数据，都要保存在Output里面：
    len_of_final_index = len(final_pick_goods_index)
    output_append = pd.DataFrame(
        {'买方客户': [buyer_information[0]] * len_of_final_index, '卖方客户': seller.iloc[final_pick_goods_index, 0].tolist(),
         '品种': ['SR'] * len_of_final_index, '货物编号': seller.iloc[final_pick_goods_index, 2].tolist(),
         '仓库': seller.iloc[final_pick_goods_index, 4].tolist(), '分配货物数量': final_pick_goods,
         '对应意向顺序': final_pick_goods_satisfy})
    output = pd.concat([output, output_append], ignore_index=True)
    return final_pick_goods_index, final_pick_goods, final_pick_goods_satisfy


# 当SR意向单元格的所有货物不能被第一意向seller_SR满足:
def pick_SR_2(buyer_information):
    global output
    check_information = [x for x in buyer_information[4:14] if x == x]
    for i in range(0, len(check_information), 2):
        if check_information[i] != '品牌' and check_information[i] != '等级':
            check_information[i + 1] = int(check_information[i + 1])
    # 需求大于总卖方:
    if buyer_information[2] > search_seller_SR_num(check_information[0], check_information[1]):
        seller_pick_goods = seller[(seller['品种'] == 'SR') &
                                   (seller[check_information[0]] == check_information[1]) & (seller['货物数量（张）'] != 0)]
        # 取走的卖方所有的Index,这个index 还是最大的seller的index,不是seller_SR的index。
        final_pick_goods_index_1 = seller_pick_goods.index
        # 每个index所拿走的货物数量：
        final_pick_goods_1 = seller_pick_goods['货物数量（张）'].values
        # 拿走对应的seller的货：
        judge_binary_list = seller.index.isin(final_pick_goods_index_1)
        seller.loc[judge_binary_list, '货物数量（张）'] = seller.loc[judge_binary_list, '货物数量（张）'].apply(cut_test)
        # 记录满足意向的情况,并且转化成字符串的形式:
        final_pick_goods_satisfy_1 = seller_pick_goods.apply(check_satisfy, args=(check_information,), axis=1).tolist()
        # 每次跑完一条买方数据，都要保存在Output里面：
        len_of_final_index = len(final_pick_goods_index_1)
        output_append = pd.DataFrame(
            {'买方客户': [buyer_information[0]] * len_of_final_index,
             '卖方客户': seller.iloc[final_pick_goods_index_1, 0].tolist(),
             '品种': ['SR'] * len_of_final_index, '货物编号': seller.iloc[final_pick_goods_index_1, 2].tolist(),
             '仓库': seller.iloc[final_pick_goods_index_1, 4].tolist(), '分配货物数量': final_pick_goods_1,
             '对应意向顺序': final_pick_goods_satisfy_1})
        output = pd.concat([output, output_append], ignore_index=True)
        # 更新buyer买方的数量，因为其中一部分已经被seller拿走.
        buyer_information_new = buyer_information.copy()
        buyer_information_new[2] = buyer_information[2] - seller_pick_goods['货物数量（张）'].sum()
        # 剩下未满足的需求，带入对应的SR_1函数，求index\goods\satisfy
        final_pick_goods_index_2, final_pick_goods_2, final_pick_goods_satisfy_2 = pick_SR_1(buyer_information_new)
    # 该买方的需求<=卖方还剩的供给：
    else:
        seller_pick_goods = seller[(seller['品种'] == 'SR') & (seller['货物数量（张）'] != 0)]
        for i in range(0, len(check_information), 2):
            # 这里要加个判断！每个意向加进来，筛选后的数量必须>=需求量,否则停止筛选！
            if seller_pick_goods[(seller_pick_goods[check_information[i]] == check_information[i + 1])][
                '货物数量（张）'].sum() >= \
                    buyer_information[2]:
                seller_pick_goods = seller_pick_goods[
                    (seller_pick_goods[check_information[i]] == check_information[i + 1])]
        # 得到的表，按照仓库排序，使货物尽量从一个仓库出货
        start_pick_seller = seller_pick_goods.sort_values(by=['仓库', '货物数量（张）'], ascending=False)
        start_pick_seller['累计货物数量（张）'] = start_pick_seller['货物数量（张）'].cumsum()
        start_pick_seller['是否拣选'] = 1
        start_pick_seller['是否拣选'].where(start_pick_seller['累计货物数量（张）'] <= buyer_information[2], 0, inplace=True)
        final_pick_goods_pd = start_pick_seller[start_pick_seller['是否拣选'] == 1]
        # 还得判断下一个是否加进来？
        if len(final_pick_goods_pd) != 0 and final_pick_goods_pd['累计货物数量（张）'].values[-1] < buyer_information[2]:
            final_pick_goods_pd = final_pick_goods_pd.append(start_pick_seller.iloc[len(final_pick_goods_pd), :])
        elif len(final_pick_goods_pd) == 0:
            final_pick_goods_pd = final_pick_goods_pd.append(start_pick_seller.iloc[0, :])
        # 取走的卖方的Index,这个index 还是最大的seller的index,不是seller_SR的index。
        final_pick_goods_index = final_pick_goods_pd.index
        # 每个index所拿走的货物数量：
        final_pick_goods = final_pick_goods_pd['货物数量（张）'].values
        if len(final_pick_goods) == 1:
            final_pick_goods[0] = buyer_information[2]
        else:
            final_pick_goods[-1] = buyer_information[2] - final_pick_goods_pd['累计货物数量（张）'].values[-2]
        # 拿走对应的seller的货：
        judge_binary_list = seller.index.isin(final_pick_goods_index)
        last_pick_goods_number = seller.loc[final_pick_goods_index[-1], '货物数量（张）']
        seller.loc[judge_binary_list, '货物数量（张）'] = seller.loc[judge_binary_list, '货物数量（张）'].apply(cut_test)
        seller.loc[final_pick_goods_index[-1], '货物数量（张）'] = last_pick_goods_number - final_pick_goods[-1]
        # 记录满足意向的情况,并且转化成字符串的形式:
        final_pick_goods_satisfy = final_pick_goods_pd.apply(check_satisfy, args=(check_information,), axis=1).tolist()
        # 每次跑完一条买方数据，都要保存在Output里面：
        len_of_final_index = len(final_pick_goods_index)
        output_append = pd.DataFrame(
            {'买方客户': [buyer_information[0]] * len_of_final_index,
             '卖方客户': seller.iloc[final_pick_goods_index, 0].tolist(),
             '品种': ['SR'] * len_of_final_index, '货物编号': seller.iloc[final_pick_goods_index, 2].tolist(),
             '仓库': seller.iloc[final_pick_goods_index, 4].tolist(), '分配货物数量': final_pick_goods,
             '对应意向顺序': final_pick_goods_satisfy})
        output = pd.concat([output, output_append], ignore_index=True)
    return True


for p in range(len(buyer_first_unique_SR)):
    if names.get('buyer_unique_SR' + str(p))[
        '购买货物数量'].sum() <= search_seller_SR_num(buyer_first_unique_SR[p][:2],
                                                buyer_first_unique_SR[p][2:]):
        buyer_unique_cache = names.get('buyer_unique_SR' + str(p)).sort_values(by='购买货物数量', ascending=False)
        buyer_unique_cache.apply(pick_SR_1, axis=1, result_type="expand")
    else:
        buyer_unique_cache = names.get('buyer_unique_SR' + str(p)).sort_values(by='平均持仓时间', ascending=False)
        buyer_unique_cache.apply(pick_SR_2, axis=1, result_type="expand")


# ________________________________________________________对CF进行操作_________________________________________________
...
...

我处理该问题的思路是：

把CF和SR分开处理；
把买方数据分成两大类，有第一意向和没有任何意向；
有第一意向的买方数据，按照第一意向不同，分成不同的意向单元，对意向单元做标准处理；
处理意向单元时，先判断意向单元的所有货物能否被剩下的卖方所满足。如果满足，带入pick_goods_CF_1或pick_goods_SR_1中，如果不满足，就带入pick_goods_CF_2或pick_goods_SR_2中；
选货时，选择同一个仓库出货；
卖方的数据及时更新，这步比较耗费时间；

另外，还有一些代码没有放上去：