1 #-*- coding: utf-8 -*-
2 '''
3 Created on 2018年8月14日4
5 @author: zww6
7 '''
8 importtime9 importre10 importrandom11 importrequests12 from lxml importetree13 importpandas as pd14
15
16 username_list, score_list, date_list, like_list, content_list, userid_list =[17 ], [], [], [], [], []18
19
20 defget_content(musicId, currentPage):21 headers ={22 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}23 cookies = {'cookies': '你的cookie'}24
25 url = ''.join(['https://music.douban.com/subject/',26 str(musicId), '/comments/hot?p=', str(currentPage)])27
28 res = requests.get(url, headers=headers, cookies=cookies)29 res.encoding = "utf-8"
30
31 i