import requests
from bs4 import BeautifulSoup
def get_episode_links(url):
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
episode_elements = soup.find_all('li', class_='episode-item')
links = []
for episode_element in episode_elements:
link_element = episode_element.find('a')
if link_element:
episode_link = link_element.get('href')
full_episode_link = "https:" + episode_link
links.append(full_episode_link)
print("目录地址: \n" + "\n".join(links))
else:
print(f"无法获取目录地址内容。状态码: {response.status_code}")
def get_names_and_links(url):
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
div_elements = soup.find_all('div', class_='hitv_vertical-txtbox')
for div_element in div_elements:
name_element = div_element.find('p', class_='hitv_vertical-title')
link_element = div_element.find('a')
if name_element and link_element:
name = name_element.get('title')
link = link_element.get('href')
full_link = "https://www.mgtv.com" + link
print(f"名称: {name}, 分地址: {full_link}")
response_sub = requests.get(full_link)
if response_sub.status_code == 200:
soup_sub = BeautifulSoup(response_sub.text, 'html.parser')
script_element = soup_sub.find('script', string=lambda t: "window.location" in t)
if script_element:
script_content = script_element.text.strip()
second_url_start = script_content.find('"') + 1
second_url_end = script_content.rfind('"')
second_url = script_content[second_url_start:second_url_end]
full_second_url = "https://www.mgtv.com" + second_url
print(f"二次地址: {full_second_url}")
get_episode_links(full_second_url)
response_desc = requests.get(full_second_url)
if response_desc.status_code == 200:
soup_desc = BeautifulSoup(response_desc.text, 'html.parser')
description_element = soup_desc.find('meta', {'name': 'description'})
if description_element:
description = description_element.get('content')
print(f"简介: {description}")
else:
print("无法找到简介")
image_element = soup_desc.find('img', src=True)
if image_element:
image_src = image_element['src']
print(f"图片: {image_src}")
director_element = soup_desc.find('p', class_='introduce-item')
if director_element:
director_name = director_element.find('a').text.strip()
print(f"导演:{director_name}")
else:
print("无法找到导演名称")
actor_element = soup_desc.find('p', class_='introduce-item leader')
if actor_element:
actor_name = actor_element.find('a').text.strip()
print(f"主演:{actor_name}")
else:
print("无法找到主演信息")
else:
print(f"无法获取二次地址内容。状态码: {response_desc.status_code}")
data = {
"pass": "入库地址",
"vod_name": name,
"type_id": 2,
"type_name": "分类名称",
"vod_blurb": description,
"vod_pic": image_src,
"vod_content": description,
"vod_play_from": "mgtv",
"vod_play_url": full_second_url
}
post_response = requests.post("https://cms地址/api.php/receive/vod", data=data)
if post_response.status_code == 200:
response_data = post_response.json()
if response_data["code"] == 1:
print("成功")
else:
print("错误:", response_data["err"])
else:
print(f"无法发送请求: {post_response.status_code}")
else:
print(f"无法获取分地址内容。状态码: {response_sub.status_code}")
else:
print(f"无法获取URL。状态码: {response.status_code}")
if __name__ == "__main__":
url = "https://www.mgtv.com/lib/2?lastp=list_index&lastp=ch_tv&kind=a1&area=a1&year=all&sort=c1&chargeInfo=a1&fpa=2912&fpos="
get_names_and_links(url)