from bs4 import BeautifulSoup
import json
import urllib.request
# A B分别放入html文件名 存为ipynb文件名
a="A.html"
b="B.ipynb"# for local html file
response =open(a,encoding='utf8')
text = response.read()
soup = BeautifulSoup(text,'lxml')# see some of the htmlprint(soup.div)
dictionary ={'nbformat':4,'nbformat_minor':1,'cells':[],'metadata':{}}for d in soup.findAll("div"):if'class'in d.attrs.keys():for clas in d.attrs["class"]:if clas in["text_cell_render","input_area"]:# code cellif clas =="input_area":
cell ={}
cell['metadata']={}
cell['outputs']=[]
cell['source']=[d.get_text()]
cell['execution_count']=None
cell['cell_type']='code'
dictionary['cells'].append(cell)else:
cell ={}
cell['metadata']={}
cell['source']=[d.decode_contents()]
cell['cell_type']='markdown'
dictionary['cells'].append(cell)open(b,'w').write(json.dumps(dictionary))
response.close()