方式一:
RULES = [ [r'<style[\s\S]*?</style>', ''], [r'<script[\s\S]*?</script>', ''], [r'</(div|h|p)>', '\n'], [r'<br.*?>', '\n'], [r'<[^>]+?>', ''], [r' ', ' '], [r'&', '&'], [r'<', '<'], [r'>', '>'], [r'"', '"'], [r'^[\n\s]*', ''], [r'^\s+', ' '], [r'^[\s\S]*?Description', ''], [r'Payment[\s\S]*$', ''], ] result = reduce(lambda desc, rule: re.sub(rule[0], rule[1], desc, flags=re.I | re.M), RULES, description).strip() print(result)
方式二:
result = description result = re.sub(r'<style[\s\S]*?</style>', '', result, flags=re.I | re.M) result = re.sub(r'</(div|h|p)>', '\n', result, flags=re.I | re.M) result = re.sub(r'<br.*?>', '\n', result, flags=re.I | re.M) result = re.sub(r'<[^>]+?>', '', result, flags=re.I | re.M) result = re.sub(r' ', ' ', result, flags=re.I | re.M) result = re.sub(r'&', '&', result, flags=re.I | re.M) result = re.sub(r'<', '<', result, flags=re.I | re.M) result = re.sub(r'>', '>', result, flags=re.I | re.M) result = re.sub(r'"', '"', result, flags=re.I | re.M) result = re.sub(r'^[\n\s]*', '', result, flags=re.I | re.M) result = re.sub(r'^[\s\S]*?Description', '', result, flags=re.I | re.M) result = re.sub(r'Payment.*?$', '', result, flags=re.I | re.M) result = result.strip() print(result)