import math
import pandas as pd
import re,json
import time
import numpy as np
from get_ebay_item_thread_db_specifics_api import Database
import os
import glob
from shutil import copyfile
from Lib import hashlib
import pandas as pd
from PIL import Image
"""
33709 Fog Lights
33710 Head Lights
33716 3rd Brake Light
"""
category_filter = [33710,33709,33716]
def specific_attribute_parser_df(infile):
df = pd.read_excel(infile)
print(df.info())
df = df[df["pCategoryID"].isin(category_filter) & df["specifics"].notnull()] # 筛选类目和非空
print(df.info())
data = time.strftime("%Y-%m-%d")
dataframes = []
for i in df.index:
ebayno = df.loc[i,"ebayno"]
specific = df.loc[i,"specifics"]
item = json.loads(specific)
item["ebayno"] = ebayno
myseries = pd.Series(item) #用字典创建series
dataframes.append(myseries) # series数组
result = pd.DataFrame(dataframes) # 构造pandas
res = result.count(axis='index') # 将列非空计数
res = res.sort_values(ascending=False) # series 降序 根据count排序
columns = list(df.columns)
col = list(res.index)
col.remove("ebayno")
columns.extend(col)
df = pd.merge(df,result,on="ebayno",how="left")
df = df.sort_values(by=["sold"],ascending=False)
return df[columns]
def default(obj, value):
if obj is None:
return value
else:
return ";".join(obj)
def get_sku_number(df):
feild = ['Manufacturer Part Number', 'Interchange Part Number','Other Part Number', 'Sku']
df["SKU_SUM"] = ''# 创建一列
for f in feild: # 抽取列中的sku相关信息
for i in df[df[f].notnull()].index: # 非NaN
if not df.loc[i,'SKU_SUM']: # 不等于‘’
values = df.loc[i,f]
if values:
print("解析前 = ",values)
sku_number = default(re.findall(r"140\d{2}\D*$|141\d{2}\D*$",values,flags=re.I),'')
print("sku_number = ",sku_number)
df.loc[i,'SKU_SUM']=sku_number
df = df[df["SKU_SUM"] != '']
df=df.sort_values(by='SKU_SUM')
columns = list(df.columns)
columns.remove("SKU_SUM")
columns.insert(columns.index("ebayno"),"SKU_SUM")
return df[columns]
def ensure_SKU_JX_ebay(ebay,vio):
sku_map = {}
for sku in vio["产品SKU"]:
nsku = str(sku).replace("-",'')
sku_map[nsku] = sku
print(sku_map)
ebay["SKU_SUM"] = ['JX-'+str(i) for i in ebay["SKU_SUM"]]
for i in ebay["SKU_SUM"].index:
sku = ebay.loc[i,"SKU_SUM"]
newsku = sku_map.setdefault(str(sku).replace("-",''),'')
ebay.loc[i,"SKU"] = newsku # 增加一列,映射成功表示相同SKU
columns = list(ebay.columns)
columns.remove("SKU")
columns.insert(columns.index("SKU_SUM"), "SKU")
return ebay[columns]
def sku_listing(df):
df = df[df["SKU"] != '']
col = list(df.columns)[:14]
res = df.count(axis='index') # 列非空计数
res = res.sort_values(ascending=False) # series 排序 根据count排序
res = res[res>0] # series 选择
print(res.index)
for c in res.index: # 按列非空数从大到小排序
if c not in col:
col.append(c)
print(col)
return df[col]
def picture_archive(item,infile):
# item sku : [ebayno1,ebayno2]
for sku,ebaynos in item.items():
print(sku)
print(ebaynos)
os.makedirs(os.path.join(infile, sku), exist_ok=True)
picset = set()
for ebayno in ebaynos:
picfile = glob.glob(infile + str(ebayno) + '*.jpg')
for aa in picfile:
md5 = hashlib.md5(open(aa, 'rb').read()).hexdigest()
if not md5 in picset:
picset.add(md5)
copyfile(aa, os.path.join(infile, sku, os.path.basename(aa)))
print(len(picset))
def picture_archive_map(sku_to_listing):
skutl = sku_to_listing[["SKU", "ebayno"]]
item = {}
skus = skutl["SKU"].unique()
for sku in skus:
ebaynos = skutl.loc[skutl["SKU"] == sku, "ebayno"].values
item[sku] = ebaynos
return item
图片转存二
最新推荐文章于 2023-07-24 15:59:33 发布