import requests
import re
from urllib.parse import unquote
import json
from pprint import pprint
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import os
def get_video_id(url):
# 创建一个Chrome WebDriver实例
chrome_options = Options()
chrome_options.add_argument("--headless") # This line makes Chrome run in headless mode
chrome_options.add_argument("--disable-logging")
# Initialize the WebDriver with the specified options
driver = webdriver.Chrome(options=chrome_options)
# 打开网页
driver.get(url)
# 获取跳转后的网址
final_url = driver.current_url
# 打印跳转后的网址
print("跳转后的网址:", final_url)
# 关闭WebDriver实例
#driver.quit()
video_id = final_url.split("/video/")[1]
return video_id
def download_byid(video_id):
url ="https://www.douyin.com/discover?modal_id="+video_id
print(url)
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
# 'Cookie':''
}
response = requests.get(url= url,headers=headers)
print(response)
html = response.text
#
info = re.findall('<script id="RENDER_DATA" type="application/json">(.*?)</script>',html)
info_json = unquote(info[0])
#pprint(info_json)
json_data = json.loads(info_json)
video_url = 'https:'+json_data['app']['videoDetail']['video']['bitRateList'][0]['playAddr'][0]['src']
video_title = json_data['app']['videoDetail']['desc']
pprint(video_url)
pprint(video_title)
video_content = requests.get(url = video_url,headers=headers).content
with open ("download/"+video_id+'.mp4',mode= 'wb')as f:
f.write(video_content)
with open ("download/"+video_id+'.txt',mode= 'w', encoding='utf-8')as f:
f.write(video_title)
return "download/"+video_id+'.mp4',video_title,video_url
if __name__ == "__main__":
url2 = 'https://v.douyin.com/i2G6aAcM/'
print(get_url(url2))
download(url2)