python数据处理2

采用 txt文件保存数据,字段以逗号(,)隔开,直接将文件扩展名换成csv即可,比1用DateFrame快几倍

import pandas
import numpy as np
if __name__ == '__main__':
    file_object = open('dataforev.txt', 'w+')
    file_object.write("product_sku, specify_name_viomall,  specify_name_ebay, specify_value, state\n")
    a_p = "viomall_pitem_specific.csv"
    b_p = "ebay_specific_required.csv"
    a_df = pandas.DataFrame(
        pandas.read_csv(a_p, encoding="gbk", keep_default_na=True))
    b_df = pandas.DataFrame(
        pandas.read_csv(b_p, encoding="gbk", keep_default_na=True))
    skus = a_df.get("product_sku").drop_duplicates()
    for sku in skus:
        print(sku)
        A_df = pandas.DataFrame(data=a_df[a_df["product_sku"] == sku])
        ge_category1 = A_df.get("ge_category1").values[0]
        A_df = A_df[["product_sku",  "specify_name", "specify_value"]]
        B_df = pandas.DataFrame(data=b_df[b_df["ge_category1"] == ge_category1])
        B_df = B_df[["specify_name" , "state"]]
        for i in A_df.values:
            specify_name = i[1]
            tm_pd = pandas.DataFrame(data=(B_df[B_df["specify_name"] == specify_name]))
            state = np.nan
            if len(tm_pd.get("state").values) > 0 :
                state = tm_pd.get("state").values[0]
            if len(tm_pd.values) == 1 :
                if str(i[2]).strip() == "nan":
                    va = ""
                else:
                    va = str(i[2]).strip()
                if str(state).strip() == "nan":
                    st = ""
                else:
                    st = str(state)

                file_object.write(""+ str(i[0]) +"," + str(specify_name) + "," + str(specify_name)+ ","+ va + ","+st + "\n")
            else :
                if str(i[2]).strip()  == "nan":
                    value = ""
                else:
                    value = str(i[2])
                if str(state).strip() == "nan":
                    s = ""
                else:
                    s = str(state)
                file_object.write(
                    "" + str(i[0]) + "," + str(specify_name)+  "," +""+ "," + value + ","  + s + "\n")

        for i in B_df.values:
            specify_name = i[0]
            tm_pd = pandas.DataFrame(data=(A_df[A_df["specify_name"] == specify_name]))
            if len(tm_pd.values) == 0 :
                if str(i[1]).strip()  == "nan":
                    st = ""
                else:
                    st = str(i[1])
                file_object.write(
                    "" + str(sku) + "," + "" + "," + str(specify_name) + "," +"" + ","+ st +"\n" )

    file_object.close()


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值