description中fits解析——正则表达式

from functools import reduce
import numpy as np
import  pandas as pd
import re
from  bs4 import BeautifulSoup
def get_description_string(description):
    RULES = [
        # [r'^[\s\S](fitment)',""]
        [r'Item[- ]?offered[\s\S]*$',""],
        [r'Item[- ]?Included[\s\S]*$',""],
        [r'position[\s\S]*$',""],
        [r'Material[\s\S]*$',""],
        [r'Item[- ]?Condition[\s\S]*$',""],
        [r'</(div|h|p)>', '\n'],
        [r'<br.*?>', '\n'],
        [r'<[^>]+?>', ''],
        [r'&nbsp;', ' '],
        [r'&amp;', '&'],
        [r'&lt;', '<'],
        [r'&gt;', '>'],
        [r'&quot;', '"'],
        [r'^[\n\s]*', ''],
        [r'^\s+', ' '],
        [r'^make[\s\S]*$',""],

    ]
    result = reduce(lambda desc, rule: re.sub(rule[0], rule[1], desc, flags=re.I | re.M), RULES, description).strip()
    return result
def description_parse(infile,outfile):

    df = pd.read_excel(infile,encoding="utf-8")
    df["description"]=df["description"].fillna("")
    print(df.info())
    dataframe = []
    for ind in df.index:
        sku = df.loc[ind,"product_sku"]
        print(sku)
        des = df.loc[ind,"description"]
        des = get_description_string(des)
        print(des)
        df.loc[ind,"fit"]=des
    df.to_excel(outfile,index=False)


description_parse("ACES_fitment_description.xlsx","ACES_fitment_description_parse.xlsx")



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值