# -*- coding:utf-8 -*- import urllib import re import MySQLdb import sys import os #print sys.getdefaultencoding() urlz = 'http://www.budejie.com/video' def video(i): html = urllib.urlopen(i).read() html = html.decode('utf-8') reg = r'data-text="(.*?)">' # <a href="(.*?).mp4"' seg = r'<a href="(.*?)" target="_blank" download="" class="ipad-down-href" style="border:0;outline: none;">' result = re.findall(reg, html) goal = re.findall(seg, html) for i in range(len(goal)): Insert(result[i * 2 + 1], goal[i]) path = r"D:\budejie\%s.mp4".encode('gb2312') %result[2 * i + 1] print path #os.makenod(path) try: urllib.urlretrieve(goal[i],path) except Exception as e: print e print result[i * 2 + 1] + '\t' + goal[i] def Insert(name, url): conn = MySQLdb.connect('127.0.0.1', 'root', '123', 'game') cursor = conn.cursor() conn.set_character_set('utf8') cursor.execute('SET NAMES utf8;') cursor.execute('SET CHARACTER SET utf8;') cursor.execute('SET character_set_connection=utf8;') try: sql = 'insert into video_budejie values(%s,%s)' tmp = (name, url) cursor.execute(sql, tmp) conn.commit() except Exception as e: print e lm = [urlz] for i in range(1, 51): tmp = urlz + '/%d' % i lm.append(tmp) for each in lm: print each video(each)
Python 爬虫,爬取”百思不得姐“搞笑视频
最新推荐文章于 2024-04-07 08:10:55 发布