from requests_html import HTMLSession import xlwt # 建立会话 session = HTMLSession() url = 'https://www.jianshu.com/p/85f4624485b9' read = session.get(url) select = 'body > div.note > div.post > div.article > div.show-content > div > p > a' def get_url_element(select): url_list = [] url_element = read.html.find(select) try: for result in url_element: # 返回网页元素 对应的文字描述 get_text = result.text # 将返回数组转为list 返回HTML中的绝对路径链接 get_links = list(result.absolute_links)[0] url_list.append(get_text) url_list.append(get_links) return url_list except: None # 创建Excel对象 workExcel = xlwt.Workbook() # 添加表名 sheet = workExcel.add_sheet('sheet') data = get_url_element(select) # 将list转化为dictionary df = dict(zip(data[0::2],data[1::2])) row = 0 # 迭代字典 for i,j in df.items(): sheet.write(row,0,i) sheet.write(row,1,j) row += 1 workExcel.save('E:\\firstExcel\\output.xls')
转载于:https://my.oschina.net/u/3867704/blog/3024564