程序代码:
import requestsfrom lxml import etreeimport xlwtheaders = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36', }def saveData(datalist,savepath): print('save....') book = xlwt.Workbook(encoding='utf-8') sheet = book.add_sheet('sht1',cell_overwrite_ok=True) col = ('标题', '日期') for i in range(0, 2): sheet.write(0, i, col[i]) #列名 for i in range(0, 5): data = datalist[i] for j in range(0, 2): sheet.write(i+1, j, data[j]) book.save(savepath) print('save ok....')if __name__ == "__main__": # 获取页面源码 url = 'https://weixin.qq.com/' page_text = requests.get(url=url, headers=headers).text # 数据解析 tree = etree.HTML(page_text) li_list = tree.xpath('/html/body/div/div/div[2]/div[3]/ul/li') #将li对象列表赋值给li_list变量 savepath='aaa.xls' datalist =[] # 循环遍历列表对象 for li in li_list: data=[] title = li.xpath('./a/text()')[0] # /text():获取的是标签中直系的文本内容 data.append(title) date = li.xpath('./span/text()')[0] data.append(date) datalist.append(data) saveData(datalist, savepath )
相关技术:
1、xlwt第三方模块;2、列表的嵌套;