from faker import Faker
import random
import pandas as pd
# 使用Faker模拟订单编号
f = Faker(locale='zh_CN')defget_order_count()->int:"""Generate the count of orders in one day randomly
Returns:
the count of strore orders in one day
"""# 如果要控制订单数据,可以在这里调整,15为下限,100为上限return random.randint(1,5)defget_amount()->float:"""Generate the amount of order randomly
Returns:
the amount of order
"""returnround(random.random()*200+200,2)defget_orders()->list:
order_count = get_order_count()
data =[get_amount()for i inrange(order_count)]return data
if __name__ =='__main__':# 获取店铺数据
store_data = pd.read_excel('./store_data.xlsx')
store_data['key']=1# 生成时间数据
date = pd.DataFrame(
pd.date_range(start='20220101', end='20221231', freq='D').strftime('%Y-%m-%d').values,
columns=['time'],
dtype='object')
date['key']=1# 合并数据,生成大约30w条数据
orders = pd.merge(left=store_data, right=date,
how='outer', on=['key']).drop('key', axis=1)# 模拟订单数据
orders['amount']= orders.apply(
func=lambda x: get_orders(),
axis=1)# 扁平化订单数据
orders = orders.explode('amount', ignore_index=True)# 设置订单id号
orders['order_id']= orders.reset_index()['index'].apply(lambda index: f'HDL-{index:0=7d}')# 筛选需要的字段
orders = orders.loc[:,['order_id','time','amount','店铺编号']]# 按时间对结果进行排序
orders.sort_values(by=['order_id'], inplace=True)# 对字段进行重命名
orders.rename(columns={'order_id':'订单号','time':'订单时间','amount':'销售额'}, inplace=True)
orders.to_excel('./orders.xlsx', index=False)# orders.to_csv('./orders.csv', sep=',', encoding='utf-8', index=False)