import requests import re, os, time def dytt(num): for page in range(1,num+1): url = 'http://www.dytt8.net/html/gndy/dyzz/list_23_%d.html' % page print(url) response = requests.get(url) # print(response.text) pattern = '<a href="(.*)" class="ulink"' res_list = re.findall(pattern, response.text) # print(res_list) path = 'dytt_download/' pattern2 = '"magnet.*?"' pattern3 = 'style="WORD-WRAP: break-word".*?href="(.*?)">' pattern4 = '<title>(.*)</' for item in res_list: try: url = 'http://www.dytt8.net%s' % item response = requests.get(url) magnet = re.search(pattern2, response.text).group(0) print(magnet) th_url = re.search(pattern3, response.content.decode('gbk')).group(1) print(th_url) title = re.search(pattern4, response.content.decode('gbk')).group(1) print(title) doc_path = path + '/' + item.split('/')[-1] if not os.path.exists(doc_path): os.mkdir(doc_path) with open(doc_path + '/' + 'magnet.txt', 'w') as f: f.write(magnet) with open(doc_path + '/' + item.split('/')[-1] + '.torrent', 'w') as f: f.write(th_url) time.sleep(2) except: print(url) if __name__ == '__main__': num = 3 dytt(num)
电影天堂信息爬取
最新推荐文章于 2023-06-23 05:07:36 发布