python网络爬虫从入门到实践 第4章(一)

python网络爬虫从入门到实践 第4章

代码:

import requests

link = """https://api-zero.livere.com/v1/comments/list?callback=jQuery112403473268296510956_1531502963311&limit=10&repSeq=4272904&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1531502963313"""

headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}

r = requests.get(link, headers= headers)
print (r.text)

格式化之后的返回值:

/**/
typeof jQuery112403473268296510956_1531502963311 === 'function' && jQuery112403473268296510956_1531502963311({
	"results": {
		"parents": [{
			"replySeq": 38480448,
			"name": "***",
			"memberId": "UID_F6B215D7DEEDEAE9AF81B2D8DB4E1E5F",
			"memberIcon": "http://thirdqq.qlogo.cn/g?b=oidb&k=ricQqwkWCnMtF1oFF4D6Isg&s=100&t=1555792070",
			"memberUrl": "https://qq.com/",
			"memberDomain": "qq",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38480448,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "1.192.165.1",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-08-19T02:51:55.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "有点难哦",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31232886,
			"memberSeq": 31773506,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38470012,
			"name": "Sunny",
			"memberId": "oBVoaxPUgzCwxoSxypq9Ku9WUY3c",
			"memberIcon": "http://thirdwx.qlogo.cn/mmopen/vi_32/Q0j4TwGTfTJdB0PgiagfrjwNpjDujYns8kgLBUn5fYuJDPu5xUT0B3jnbljIrcibHBsp2owe7IbxxdIEgbu2TqXg/132",
			"memberUrl": "http://www.wechat.com",
			"memberDomain": "wechat",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38470012,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "183.250.210.197",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-08-17T05:09:51.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "试试解析真实地址抓取",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31224852,
			"memberSeq": 31765380,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38469774,
			"name": "Sunny",
			"memberId": "oBVoaxPUgzCwxoSxypq9Ku9WUY3c",
			"memberIcon": "http://thirdwx.qlogo.cn/mmopen/vi_32/Q0j4TwGTfTJdB0PgiagfrjwNpjDujYns8kgLBUn5fYuJDPu5xUT0B3jnbljIrcibHBsp2owe7IbxxdIEgbu2TqXg/132",
			"memberUrl": "http://www.wechat.com",
			"memberDomain": "wechat",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38469774,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "183.250.210.197",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 1,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-08-17T04:37:26.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "测试网络爬虫",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31224852,
			"memberSeq": 31765380,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38444830,
			"name": "照空",
			"memberId": "UID_E89F1487563A6463B8C2653589A26C13",
			"memberIcon": "http://thirdqq.qlogo.cn/g?b=oidb&k=79JiaH75XKXia1d8y0CMPiaxA&s=100&t=1555482527",
			"memberUrl": "https://qq.com/",
			"memberDomain": "qq",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38444830,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "113.222.176.166",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-08-14T07:41:17.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "test",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31210855,
			"memberSeq": 31751262,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38356587,
			"name": "O0o0O0o0O",
			"memberId": "UID_185A17117B12BEA662B3FEB1A8F9D657",
			"memberIcon": "http://thirdqq.qlogo.cn/g?b=oidb&k=nj3cibyjaMgcZLianK9p7a5Q&s=100&t=1562782712",
			"memberUrl": "https://qq.com/",
			"memberDomain": "qq",
			"good": 1,
			"bad": 0,
			"police": 0,
			"parentSeq": 38356587,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "1.25.148.187",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 1,
			"regdate": "2019-08-05T06:34:06.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "老师你好,我看的是您的第一版书,其中第64页上面您说(.*?)只匹配了smarter,但是63页印的结果是smarter than \n而且我自己测试代码是也是smarter than我把(.*?)改成(.*)后也是smarter than ,去官网查看了一下,好像是*?叫懒惰匹配\n因为我测试了代码的确是书上结果,但是和后面您说的不一样,是一些其他的原因吗?",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31162013,
			"memberSeq": 31701971,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38349288,
			"name": "雨与雨",
			"memberId": "UID_FBEEEF983EA93BE422BB0FB802493F07",
			"memberIcon": "http://thirdqq.qlogo.cn/g?b=oidb&k=0hP4VIwPrEx5icPVloTCC9A&s=100&t=1557075356",
			"memberUrl": "https://qq.com/",
			"memberDomain": "qq",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38349288,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "106.6.201.121",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-08-04T12:27:28.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "123",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31158374,
			"memberSeq": 31698300,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38349283,
			"name": "雨与雨",
			"memberId": "UID_FBEEEF983EA93BE422BB0FB802493F07",
			"memberIcon": "http://thirdqq.qlogo.cn/g?b=oidb&k=0hP4VIwPrEx5icPVloTCC9A&s=100&t=1557075356",
			"memberUrl": "https://qq.com/",
			"memberDomain": "qq",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38349283,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "106.6.201.121",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-08-04T12:27:09.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "677",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31158374,
			"memberSeq": 31698300,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38319183,
			"name": "iverson",
			"memberId": "oBVoaxCVOAzR24xKYcrYOpCcU6LM",
			"memberIcon": "http://thirdwx.qlogo.cn/mmopen/vi_32/Q0j4TwGTfTKOq59TrO00BVEw7JtUpG8Wcf6OeJzEEkiapyHQ3AeuU5r5yDbdSgFykLxbFGOe6nRQ9xIZ66jYgrA/132",
			"memberUrl": "http://www.wechat.com",
			"memberDomain": "wechat",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38319183,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "36.18.100.187",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-08-01T07:11:19.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "wyf的test",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31142089,
			"memberSeq": 31681833,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38311822,
			"name": "Liwkns",
			"memberId": "UID_ACECDE844B8032C155C09FC0B38C7BC2",
			"memberIcon": "http://thirdqq.qlogo.cn/g?b=oidb&k=0KafAicLdsVfAamlJzO470g&s=100&t=1562941631",
			"memberUrl": "https://qq.com/",
			"memberDomain": "qq",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38311822,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "61.186.190.44",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-07-31T07:41:44.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "4.3节用Selenium打开了网页,但是查看源代码的时候,评论那一块仍然是动态数据,有没有遇到一样问题的小伙伴交流一下啊 QQ424524128",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31135561,
			"memberSeq": 31675229,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}, {
			"replySeq": 38309568,
			"name": "wmn",
			"memberId": "wangmengningswu@163.com",
			"memberIcon": "https://cdn-city.livere.com/images/user_profile_1.png",
			"memberUrl": null,
			"memberDomain": "livere",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38309568,
			"directSeq": 0,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "61.186.190.44",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-07-31T02:55:19.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "2019.7.31",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 0,
			"memberSeq": 0,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}],
		"children": [{
			"replySeq": 38358130,
			"name": "O0o0O0o0O",
			"memberId": "UID_185A17117B12BEA662B3FEB1A8F9D657",
			"memberIcon": "http://thirdqq.qlogo.cn/g?b=oidb&k=nj3cibyjaMgcZLianK9p7a5Q&s=100&t=1562782712",
			"memberUrl": "https://qq.com/",
			"memberDomain": "qq",
			"good": 0,
			"bad": 0,
			"police": 0,
			"parentSeq": 38356587,
			"directSeq": 38356587,
			"shortUrl": null,
			"title": "Hello world!",
			"site": "http://www.santostang.com/2018/07/04/hello-world/",
			"email": null,
			"ipAddress": "1.25.148.187",
			"isMobile": "0",
			"agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36",
			"septSns": null,
			"targetService": null,
			"targetUserName": null,
			"info1": null,
			"info2": null,
			"info3": null,
			"image1": null,
			"image2": null,
			"image3": null,
			"link1": null,
			"link2": null,
			"link3": null,
			"isSecret": 0,
			"isModified": 0,
			"confirm": 0,
			"subCount": 0,
			"regdate": "2019-08-05T08:20:28.000Z",
			"deletedDate": null,
			"file1": null,
			"file2": null,
			"file3": null,
			"additionalSeq": 0,
			"content": "我又测试了下觉得可能是正则表达式中的dogs限制了原因,懒惰匹配是尽可能少的,也就意味着可能找不到返回none也是正常的,因为dogs的存在,所以限制了必须要把前面的smarter than全部匹配,如果没有dogs的话,我测试是可以的,就只是匹配了smarter",
			"quotationSeq": null,
			"quotationContent": null,
			"consumerSeq": 1020,
			"livereSeq": 28583,
			"repSeq": 4272904,
			"memberGroupSeq": 31162013,
			"memberSeq": 31701971,
			"status": 0,
			"repGroupSeq": 0,
			"adminSeq": 25413747,
			"deleteReason": null,
			"sticker": 0,
			"version": null
		}],
		"quotations": []
	},
	"resultCode": 200,
	"resultMessage": "Okay, livere"
});

整体代码:

import requests

link = """https://api-zero.livere.com/v1/comments/list?callback=jQuery112403473268296510956_1531502963311&limit=10&repSeq=4272904&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1531502963313"""

headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}

r = requests.get(link, headers= headers)
print (r.text)


# 获取 json 的 string
json_string = r.text
json_string = json_string[json_string.find('{'):-2]
# 从第一个左大括号提取,最后的两个字符 - 括号和分号不取

import json
json_data = json.loads(json_string)
comment_list = json_data['results']['parents']

i = 0
for eachone in comment_list:
    message = eachone['content']
    i = i + 1
    print ('i=', i,'-----', message)
    print ('\n')


运行结果:不是全部


i= 1 ----- 有点难哦


i= 2 ----- 试试解析真实地址抓取


i= 3 ----- 测试网络爬虫


i= 4 ----- test


i= 5 ----- 老师你好,我看的是您的第一版书,其中第64页上面您说(.*?)只匹配了smarter,但是63页印的结果是smarter than 
而且我自己测试代码是也是smarter than我把(.*?)改成(.*)后也是smarter than ,去官网查看了一下,好像是*?叫懒惰匹配
因为我测试了代码的确是书上结果,但是和后面您说的不一样,是一些其他的原因吗?


i= 6 ----- 123


i= 7 ----- 677


i= 8 ----- wyf的test


i= 9 ----- 4.3节用Selenium打开了网页,但是查看源代码的时候,评论那一块仍然是动态数据,有没有遇到一样问题的小伙伴交流一下啊 QQ424524128


i= 10 ----- 2019.7.31
怎么 找到的这个 真实地址

http://www.santostang.com/2018/07/04/hello-world/

在这里插入图片描述

最后的代码:

import requests
import json


def single_page_comment(link):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
    r = requests.get(link, headers=headers)
    # 获取 json 的 string
    json_string = r.text
    json_string = json_string[json_string.find('{'):-2]
    json_data = json.loads(json_string)
    comment_list = json_data['results']['parents']

    for eachone in comment_list:
        message = eachone['content']
        print(message)


for page in range(1, 4):
    link1 = "https://api-zero.livere.com/v1/comments/list?callback=jQuery1124019104109778374867_1566439164308&limit=10&offset="

    link2 = "&repSeq=4272904&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1566439164314"
    page_str = str(page)
    link = link1 + page_str + link2
    print(link)
    single_page_comment(link)

link1 和 link2 的由来是 通过 Network---- > JS 中 ,在网页中多次点击如下图片 抓取到的。
在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值