python 抓取APP内所有的mv,并放入mysql数据中

 

# /bin/python3
# -*-coding:utf-8-*-
# __author_=lixj


import requests
import re
import time
import pymysql
import json
import jsonpath

# get 请求
def get(i):
	url = 'https://www.buyao.tv/appapi/by_mvlist.php?appkey=BYMUSICOFFVN0DtKGcebowgEPLtASJfBBn6iOTQ&ac=list&ordering=1&cityid=0&page='+i+'&userid=1'
	#print('this is url***********************************************************************************************************************')
	#print('this is url:%s'%url)
	link = requests.get(url=url)
	return link.text


# 循环请求
def url_while (cishu,i):
	while i < cishu:
		str_i = str(i)	
		print('this is %s *******************************************************************************************************'%i)
		#print(get(str_i))
		#print('this is %s **********************************************************'%i)

		#判断是否有内容,有打印内容,没有退出循环
		select_id = re.search('image',get(str_i),re.M|re.I)
		if select_id:
			json_text = (get(str_i)) 
			print('this json_text type is %s '%type(json_text))
			text_image = jsonpath_image(json_text,str_i)	
			text_id = jsonpath_id(json_text,str_i)
			print('this is id %s : ' %text_id)
			text_title = jsonpath_title(json_text,str_i)
			
			print('this is title %s ' %text_title)
			sql_insert(text_id,text_title)
			#print(json_text)
		else:
			print('no have')
			break
		i = i + 1
		# 休息,服务器比较渣
		time.sleep(3)

#查询image地址
def jsonpath_image (text,i):
	json_py = json.loads(text)
	image = jsonpath.jsonpath(json_py,expr = '$..image')
	return image
	

#查询id地址
def jsonpath_id (text,i):
	json_id = json.loads(text)
	select_id_id = jsonpath.jsonpath(json_id,expr = '$..id')
	return select_id_id


#查询sumary
def jsonpath_summary(text,i):
	json_summary = json.loads(text)
	select_summary = jsonpath.jsonpath(json_summary,expr = '$..summary')
	return select_summary


#查询title
def jsonpath_title(text,i):
	json_title = json.loads(text)
	select_title = jsonpath.jsonpath(json_title,expr = '$.by_item..title')
	return select_title

#把输入的string转换为int
def input_int(input_str):
	try:
		int_shuchu = int(input_str)
	except:
		pass

	return int_shuchu	


# 连接数据库
def sql_execute(sql):
	db = pymysql.connect('localhost','root','lxj1021521','test')
	cursor = db.cursor()
	try:
		cursor.execute(sql)
		db.commit()
		db.close
		return cursor.fetchall()
	except:
		print('this is erro:%s' %sql)


#插入数据库
def sql_insert(list_execute,list_execute2):
	length = len(list_execute)
	for i in range(0,length):
		sql_sentence = 'insert into test (id,name) values(\''+list_execute[i]+'\','+'\''+list_execute2[i]+'\')'
		print(sql_sentence)
		jieguo = sql_execute(sql_sentence)
		#print('jieguo is %s' %jieguo)
		print(jieguo)


if __name__ == '__main__':

	input_start_cishu = input ('please input start page:')
	int_start_cishu = input_int(input_start_cishu)


	input_cishu = input('please input large page:')
	int_cishu = input_int(input_cishu)

	url_while(int_cishu,int_start_cishu)

	#sql_1 = 'select * from test'
	#sql = 'insert into test (id) values(\'20\');'
	#print(sql_execute(sql_1))
	#print(sql_execute(sql))

 

 

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值