python html转markdown
html2text
# pip3 install html2text
import html2text as ht
text_maker = ht.HTML2Text()
text_maker.bypass_tables = False
htmlfile = requests.get(URL)
htmlfile.encoding = 'utf-8'
soup = BeautifulSoup(htmlfile.text, 'html.parser')
content = soup.find(class_='post').text
text = text_maker.handle(content)
tomd
import tomd
htmlfile = requests.get(URL)
htmlfile.encoding = 'utf-8'
soup = BeautifulSoup(htmlfile.text, 'html.parser')
content = soup.find(class_='post').text
text = tomd.Tomd(content).markdown
pypandoc
import pypandoc
output = pypandoc.convert_text(
htmlTxt,
'md', format='html',
extra_args=['--atx-headers'])
markdownload