①先将其要清洗的转为字符串str
def praser_html(html):
"""
html: str
the html text
"""
yield BeautifulSoup(
html,
"lxml",
)
②进行清洗
def clean_name(name):
name = name.replace('\n', '').replace('()','').replace('\r', '').replace('\xa0', '').replace(' ', '')
return name
③调用def
print('name',clean_name(name))
item['name'] = clean_name(name)