python爬视频选择清晰度_Python爬取新浪微博博主高清视频

#! -*- coding:utf-8 -*-

import urllib2,urllib,json,datetime,datetime,time,requests,re

#获取uid,url

def getUrl(name):

#主页

Surl = 'https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D1%26q%3D'+urllib.quote(name)+'&page_type=searchall'

print '主页:',Surl

req = urllib2.Request(Surl,None,headers)

res = json.loads(urllib2.urlopen(req).read())

lfid = res['data']['scheme'].split('lfid=')[1].split('_-_')[0]

# print res['data']['scheme']

uid = res['data']['cards'][0]['card_group'][0]['user']['id']

profile_url = res['data']['cards'][0]['card_group'][0]['user']['profile_url']

back = profile_url.split('&luicode=')[1]

print '获取uid',uid,profile_url,back

url2 = 'https://m.weibo.cn/api/container/getIndex?uid={0}&luicode={1}&type=uid&value={2}'.format(uid,back,uid)

print url2

containerid = getContained(url2)

print '*'*100

print containerid

for i in range(100):

videoList = 'https://m.weibo.cn/api/container/getIndex?uid={0}&luicode={1}&type=uid&value={2}&containerid={3}_-_mcn_time'.format(uid, back, uid, containerid)

print '-' * 100

print videoList+'&page='+str(i)

bool = getInfo(videoList+'&page='+str(i))

if bool == True:

break

#获取containedid

def getContained(url):

req = urllib2.Request(url, None, headers)

res = json.loads(urllib2.urlopen(req).read())

container = res['data']['tabsInfo']['tabs'][2]['containerid']

return container

def getInfo(url):

headers={

'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',

'X-DevTools-Emulate-Network-Conditions-Client-Id': '81ED134A01BAFB218319255E8C4C958E',

'Referer': 'https://m.weibo.cn/u/1912713353?uid=1912713353&luicode=10000011&lfid=100103type%3D1%26q%3D6%E5%B2%9B%E5%B2%9B'

}

req = urllib2.Request(url,None,headers)

res = urllib2.urlopen(req).read()

if len(res) < 100:

return True

mediaList = json.loads(res)['data']['cards']

# print mediaList

for m in mediaList:

item = dict()

create_time = datetime.datetime.now()

source = m['mblog']['page_info']['page_url']

object_id = m['mblog']['page_info']['object_id']

title = m['mblog']['page_info']['content2']

# videosRoute = m['mblog']['page_info']['media_info']['stream_url']

img = m['mblog']['page_info']['page_pic']['url']

author = m['mblog']['user']['screen_name']

support = m['mblog']['attitudes_count']

info = m['mblog']['page_info']['content2']

#11111111111111111111111

try:

print source,object_id

url_source = 'http://video.weibo.com/show?fid='+object_id

html = requests.get(url_source, headers=headers1, allow_redirects=False)

url2 = html.headers['Location']

reqw = urllib2.Request(url2, None, headers2)

resw = urllib2.urlopen(reqw).read()

vs = re.compile(r'video-sources="(.*?)"', re.S)

v = re.findall(vs, resw)[0]

str1 = urllib.unquote(v)

str2 = str1.split('http://')[-1]

videosRoute = 'http://' + str2

print '^'*100

print videosRoute

except IndexError:

print 'eeeeeeeeeeeeeeee'

videosRoute = m['mblog']['page_info']['media_info']['stream_url']

#111111111111111111111111

item['title'] = title

item['author'] = author

item['video_url'] = videosRoute

item['img_route'] = img

item['support'] = support

item['description'] = info

item['create_time'] = create_time

try:

item['video_name'] = videosRoute.split('cn/')[1].split('?')[0]

item['img_name'] = img.split('cn/')[1].split('?')[0]

except IndexError:

item['video_name'] = videosRoute.split('stream/')[1].split('?')[0]

try:

item['img_name'] = img.split('images/')[1]

except IndexError:

item['img_name'] = img.split('stream/')[1]

print img

item['video_route'] = 'http://video.mpad365.com/' + item['video_name']

item['video_img'] = 'http://video.mpad365.com/' + item['img_name']

item['tag0'] = ""

print 'title',title

print '视频url',videosRoute

print '视频img',img

print '视频作者',author

print '点赞数',support

print '内容',info

print '-'*100

name = '李子柒'

getUrl(name)

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
首先,使用Python爬取新浪微博评论数据需要以下步骤: 1. 登录微博开放平台,创建应用并获取App Key和App Secret。 2. 使用App Key和App Secret获取access_token。 3. 使用access_token获取微博的API接口。 4. 使用API接口获取微博评论数据。 5. 将数据存入数据库或写入csv文件中。 下面是一个简单的示例代码,演示如何使用Python爬取新浪微博评论数据并存入数据库中: ```python import os import sys import time import json import pymysql import requests from urllib.parse import quote_plus from datetime import datetime from dotenv import load_dotenv load_dotenv() app_key = os.getenv("APP_KEY") app_secret = os.getenv("APP_SECRET") access_token = os.getenv("ACCESS_TOKEN") # 数据库配置 db_host = os.getenv("DB_HOST") db_port = os.getenv("DB_PORT") db_user = os.getenv("DB_USER") db_password = os.getenv("DB_PASSWORD") db_name = os.getenv("DB_NAME") # 连接数据库 db = pymysql.connect(host=db_host, port=int(db_port), user=db_user, password=db_password, db=db_name, charset="utf8mb4") cursor = db.cursor() # 微博接口配置 base_url = "https://api.weibo.com/2/comments/show.json" max_count = 200 since_id = None max_id = None while True: # 构造API请求参数 params = { "access_token": access_token, "source": app_key, "count": max_count, "since_id": since_id, "max_id": max_id, } # 发送API请求 response = requests.get(base_url, params=params) if response.status_code != 200: print("Failed to get comments data from Weibo API.") sys.exit(1) # 解析API响应数据 data = json.loads(response.text) comments = data["comments"] # 遍历评论数据并存入数据库 for comment in comments: created_at = datetime.strptime(comment["created_at"], "%a %b %d %H:%M:%S +0800 %Y") text = comment["text"] user_id = comment["user"]["id"] user_name = comment["user"]["name"] mid = comment["mid"] sql = "INSERT INTO comments (created_at, text, user_id, user_name, mid) VALUES (%s, %s, %s, %s, %s)" try: cursor.execute(sql, (created_at, text, user_id, user_name, mid)) db.commit() except: db.rollback() # 更新API请求参数 if len(comments) == 0: break else: since_id = comments[0]["id"] max_id = comments[-1]["id"] # 控制API请求频率 time.sleep(5) ``` 以上代码中使用了dotenv库来读取环境变量,因此需要在项目根目录下创建一个名为“.env”的文件,并在其中添加以下配置项: ```text APP_KEY=your_app_key APP_SECRET=your_app_secret ACCESS_TOKEN=your_access_token DB_HOST=your_db_host DB_PORT=your_db_port DB_USER=your_db_user DB_PASSWORD=your_db_password DB_NAME=your_db_name ``` 注意:上述代码中的“comments”和“comments_data”均为示例数据库表名,需要根据实际情况修改。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值