单独代码
```python
import re
import requests
import random
from zlib import crc32
from base64 import b64decode
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTHL, like Gecko) Chrome/66.0.3359.181 Safari/537.36",
}
def get_video_url_api(video_id=''):
'''取视频所在包的uri'''
r = str(random.random())[2:]
url_part = "/video/urls/v/l/toutiao/mp4/{}?r={}".format(video_id, r)
s = crc32(url_part.encode())
url = "https://ib.365yg.com{}&s={}".format(url_part, s)
return url
def get_video_url(url):
# 获取视频地址
resp = requests.get(url, headers=headers)
j_resp = resp.json()
video_url = j_resp['data']['video_list']['video_1']['main_url']
video_url = b64decode(video_url.encode()).decode()
return video_url
def get_video_id(url):
# 获取视频id
resp = requests.get(url, headers=headers)
# 获取video_id
# print(resp.text)
search = re.search("\"vid\":\"([^\"]+)\",", resp.text)
print(search.group(1))
return search.group(1)
def down(name,url):
# filename='H:\\xingzan\\'+name+'.mp4'
url = "https://www.ixigua.com/i6839662418029707784/?logTag=T_Zd3IZonXoUAas_fQKzx"
video_id = get_video_id(url)
if video_id == None:
print("get video_id error")
return
video_url_api = get_video_url_api(video_id)
print(video_url_api)
# print(video_url_api.encode())
video_url = get_video_url(video_url_api)
print(video_url)
with open('姐姐舞.mp4','wb') as f:
f.write(requests.get(video_url).content)
down(2,1)
selenium 结合下载
down.py
```python
# coding:utf8
import re
import requests
import random
from zlib import crc32
from base64 import b64decode
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTHL, like Gecko) Chrome/66.0.3359.181 Safari/537.36",
}
def get_video_url_api(video_id=''):
'''取视频所在包的uri'''
r = str(random.random())[2:]
url_part = "/video/urls/v/l/toutiao/mp4/{}?r={}".format(video_id, r)
s = crc32(url_part.encode())
url = "https://ib.365yg.com{}&s={}".format(url_part, s)
return url
def get_video_url(url):
# 获取视频地址
resp = requests.get(url, headers=headers)
j_resp = resp.json()
video_url = j_resp['data']['video_list']['video_1']['main_url']
video_url = b64decode(video_url.encode()).decode()
return video_url
def get_video_id(url):
# 获取视频id
resp = requests.get(url, headers=headers)
# 获取video_id
# print(resp.text)
search = re.search("\"vid\":\"([^\"]+)\",", resp.text)
print(search.group(1))
return search.group(1)
def down(name,url):
filename='H:\\xingzan\\'+name+'.mp4'
# url = "https://www.ixigua.com/i6805100514364097035"
video_id = get_video_id(url)
if video_id == None:
print("get video_id error")
return
video_url_api = get_video_url_api(video_id)
# print(video_url_api.encode())
video_url = get_video_url(video_url_api)
print(video_url)
with open(filename,'wb') as f:
f.write(requests.get(video_url).content)
获取url
from selenium import webdriver
import time
from dist import down#自己写的模块
x=0
driver=webdriver.Chrome('C:\chromedriver.exe')
driver.get('https://www.ixigua.com/channel/yinyue/')
time.sleep(2)
div=driver.find_element_by_css_selector('#App > div > div.v3-app-layout__content > div > div > div > div > div.FeedContainer.load-more-blocklist > div > div > div')
time.sleep(2)
for i in range(1,1000):
print(i)
title = div.find_element_by_css_selector(
'div.FeedContainer.load-more-blocklist > div > div > div > div:nth-child(' + str(
i) + ') > div > div.HorizontalFeedCard__contentWrapper.withAuthorInfo > a')
print(title.text)
print(title.get_attribute('href'))
down.down(title.text,title.get_attribute('href'))
if i % 10==0:
jsCode = "var q=document.documentElement.scrollTop=10000"
driver.execute_script(jsCode)
time.sleep(3)
# 谷歌操作js直接下拉到底
# div.find_element_by_css_selector('div.FeedContainer.load-more-blocklist > div > div > div > div:nth-child('+str(i)+')').click()