python最简单的爬取邮箱地址_python最简单的爬取邮箱地址

http://www.jb51.net/article/57161.htm

#!/usr/bin/env python

#-*- coding:utf-8 -*-

import re

import sys

def getIPAddFromFile(fobj):

regex = re.compile(r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', re.IGNORECASE)

ipadds = re.findall(regex, fobj)

print ipadds

return ipadds

def getPhoneNumFromFile(fobj):

regex = re.compile(r'1\d{10}', re.IGNORECASE)

phonenums = re.findall(regex, fobj)

print phonenums

return phonenums

def getMailAddFromFile(fobj):

regex = re.compile(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b", re.IGNORECASE)

mails = re.findall(regex, fobj)

print mails

return mails

def getUrlFromFile(fobj):

regex = re.compile(r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", re.IGNORECASE)

urls = regex.findall(fobj)

print urls

return urls

def main(FilefilePath):

fobj = open(FilefilePath, 'rb').read()

urllist = getUrlFromFile(fobj)

mailList = getMailAddFromFile(fobj)

phoneNum = getPhoneNumFromFile(fobj)

ipaddlist = getIPAddFromFile(fobj)

if __name__ == '__main__':

main(sys.argv[1])

 

# -*- coding: utf-8 -*-

import re

import urllib

def getHtml(url):

page = urllib.urlopen(url)

html = page.read()

return html

def getImg(html):

reg = r'src="(.+?\.jpg)" pic_ext'

#p=re.compile('[^\._-][\w\.-]+@(?:[A-Za-z0-9]+\.)+[A-Za-z]+$|^0\d{2,3}\d{7,8}$|^1[358]\d{9}$|^147\d{8}')

regex = re.compile(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}\b", re.IGNORECASE)

imgre = re.compile(regex)

imglist = re.findall(regex,html)

print imglist

return imglist

#x=0

#for imgurl in imglist:

#urllib.urlretrieve(imgurl,'%s.jpg' % x)

#x=x+1

html = getHtml("http://tieba.baidu.com/p/3827945043")

print getImg(html)

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值