from functools import reduce import numpy as np import pandas as pd import re from bs4 import BeautifulSoup def get_description_string(description): RULES = [ # [r'^[\s\S](fitment)',""] [r'Item[- ]?offered[\s\S]*$',""], [r'Item[- ]?Included[\s\S]*$',""], [r'position[\s\S]*$',""], [r'Material[\s\S]*$',""], [r'Item[- ]?Condition[\s\S]*$',""], [r'</(div|h|p)>', '\n'], [r'<br.*?>', '\n'], [r'<[^>]+?>', ''], [r' ', ' '], [r'&', '&'], [r'<', '<'], [r'>', '>'], [r'"', '"'], [r'^[\n\s]*', ''], [r'^\s+', ' '], [r'^make[\s\S]*$',""], ] result = reduce(lambda desc, rule: re.sub(rule[0], rule[1], desc, flags=re.I | re.M), RULES, description).strip() return result def description_parse(infile,outfile): df = pd.read_excel(infile,encoding="utf-8") df["description"]=df["description"].fillna("") print(df.info()) dataframe = [] for ind in df.index: sku = df.loc[ind,"product_sku"] print(sku) des = df.loc[ind,"description"] des = get_description_string(des) print(des) df.loc[ind,"fit"]=des df.to_excel(outfile,index=False) description_parse("ACES_fitment_description.xlsx","ACES_fitment_description_parse.xlsx")
description中fits解析——正则表达式
最新推荐文章于 2022-03-27 16:55:31 发布