导出邮箱里的联系人:支持Gmail,126,网易,搜狐,Hotmail,新浪,雅虎,MSN

 Python代码: 导出邮箱里的联系人:支持Gmail,126,网易,搜狐,Hotmail,新浪,雅虎,MSN

#!/usr/bin/env python
#coding=utf-8
from BeautifulSoup import BeautifulSoup
import os , urllib , urllib2 , pdb
import cookielib
import httplib
import csv , re

GDATA_URL = '/accounts/ClientLogin'

class MailContactError( Exception ):
    pass

class MailContact :
    def __init__( self , username , password ):
        pass
    def login( self ):
        pass
    def get_contacts( self ):
        pass
    def get_contact_page( self ):
        pass
   
class GMailContact( MailContact ):
    """
    A class to retrieve a users contacts from their Google Account.
   
    Dependencies:
    -------------
    * BeautifulSoup.
    * That's it. :-)

    Usage:
    ------
    >>> g = GMailContact('email@example.org', 'password')
    >>> g.login()
    (200, 'OK')
    >>> g.get_contacts()
    >>> g.contacts
    [(u'Persons Name', 'name@person.com'), ...]


    """
    def __init__( self , username = 'test@gmail.com' , password = 'test' , service = 'cp' ):
        self . mail_type = "@gmail.com"
        self . username = username + self . mail_type
        self . password = password
        self . account_type = 'HOSTED_OR_GOOGLE'  # Allow both Google Domain and Gmail accounts
        self . service = service                  # Defaults to cp (contacts)
        self . source = 'google-data-import'      # Our application name
        self . code = ''                          # Empty by default, populated by self.login()
        self . contacts = []                      # Empty list by default, populated by self.get_contacts()
   
    def login( self ):
        """
        Login to Google. No arguments.
        """
        data = urllib . urlencode ({
            'accountType' : self . account_type ,
            'Email' : self . username ,
            'Passwd' : self . password ,
            'service' : self . service ,
            'source' : self . source
        })
        headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/plain'
        }
       
        conn = httplib . HTTPSConnection( 'google.com')
        conn . request( 'POST' , GDATA_URL , data , headers)
        response = conn . getresponse()
        if not str( response . status) == '200' :
            raise GdataError( "Couldn't log in. HTTP Code: %s , %s " % ( response . status , response . reason))
           
        d = response . read()
       
        self . code = d . split( " /n " )[ 2 ] . replace( 'Auth=' , '')
        conn . close()
        return response . status , response . reason
   
    def _request( self , max_results = 200 ):
        """
        Base function for requesting the contacts. We'll allow other methods eventually
        """
        url = '/m8/feeds/contacts/ %s /base/?max-results= %d ' % ( self . username , max_results)
       
        headers = { 'Authorization' : 'GoogleLogin auth= %s ' % self . code }
       
        conn = httplib . HTTPConnection( 'www.google.com')
        conn . request( 'GET' , url , headers = headers)
        response = conn . getresponse()
        if not str( response . status) == '200' :
            raise MailContactError( "Couldn't log in. HTTP Code: %s , %s " % ( response . status , response . reason))
       
        page = response . read()
        conn . close()
        return page
   
    def get_contacts( self , max_results = 200 ):
        """ Parses the contacts (using BeautifulSoup) from self._request, and then populates self.contacts
        """
        soup = BeautifulSoup( self . _request( max_results))
        self . contacts = []
        for entry in soup . findAll( 'title' ):
            if len( entry . parent . findAll ([ 'gd:email' , 'title' ])) == 2 :
                s = entry . parent . findAll ([ 'gd:email' , 'title' ])
                self . contacts . append((s [ 0 ] . string , s [ 1 ] . get( 'address')))
       
        return

class M126Contact( MailContact ):
    def __init__( self , username , password ):
        self . mail_type = "@126.com"
        self . username = username
        self . password = password       
        self . login_host = 'entry.mail.126.com'
        self . login_url = '/cgi/login?redirTempName=https.htm&hid=10010102&lightweight=1&verifycookie=1&language=0&style=-1'
        self . login_data = urllib . urlencode ({
            'domain' : '126.com' ,
            'language' : 0 ,
            'bCookie' : '' ,
            'user' : self . username ,
            'pass' : self . password ,
            'style' : - 1 ,
            'remUser' : '' ,
            'secure' : '' ,
            'enter.x' : '%B5%C7+%C2%BC'
        })
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Refer' : 'http://www.126.com/'
        }
        self . contact_host = 'g2a10.mail.126.com'
        self . contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid= %(sid)s &listnum=200&tempname=address %% 2faddress.htm'
       

    def login( self ):
        conn = httplib . HTTPSConnection( self . login_host)
        conn . request( 'POST' , self . login_url , self . login_data , self . login_headers)
        response = conn . getresponse()
        if not str( response . status) == '200' :
            raise MailContactError( "Couldn't log in. HTTP Code: %s , %s " % ( response . status , response . reason))
        #sc="Coremail=aaYgsaQsvSmKa%MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd; path=/; domain=.126.com"
        #sid="MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd"
        sc = response . getheader( 'Set-Cookie')
        if not sc or sc . find( "Coremail") == - 1 :
            #用户密码不正确
            raise MailContactError( "Email user %s%s password %s not correct!" % ( self . username , self . mail_type , self . password))
        cookie = sc . split ()[ 0 ]
        coremail = cookie [ cookie . find( '=') + 1 : cookie . find( ';' )]
        sid = coremail [ coremail . find( '%') + 1 :]
        self . contact_url = self . contact_url % { 'sid' : sid }
        self . contact_headers = {
        'Cookie' : 'MAIL126_SSN= %(user)s ; NETEASE_SSN= %(user)s ; nts_mail_user= %(user)s ; logType=df; ntes_mail_firstpage=normal; /
        Coremail= %(coremail)s ;mail_host=g2a14.mail.126.com; mail_sid= %(sid)s ; mail_uid= %(user)s @126.com; /
        mail_style=dm3; oulink_h=520; ntes_mail_noremember=true' % { 'user' : self . username , 'coremail' : coremail , 'sid' : sid }
        }
        conn . close()
       
    def get_contact_page( self ):
        conn = httplib . HTTPConnection( self . contact_host)
        conn . request( 'GET' , self . contact_url , headers = self . contact_headers)
        response = conn . getresponse()
        if not str( response . status) == '200' :
            raise MailContactError( "Couldn't getc contact page. HTTP Code: %s , %s " % ( response . status , response . reason))
        page = response . read()
        conn . close()
        return page
       
    def get_contacts( self ):
        page = self . get_contact_page()
        self . contacts = []
        soup = BeautifulSoup( page)
        xmps = soup . findAll( 'xmp')
        for x in xmps :
            if x [ 'id' ] . startswith( 't' ):
                self . contacts . append(( x . contents [ 0 ], x . space . string))

class M163Contact( MailContact ):
    def __init__( self , username , password ):
        self . mail_type = "@163.com"
        self . username = username
        self . password = password     
        self . contacts = [] 
        self . login_host = 'reg.163.com'       
        self . login_url = '/logins.jsp?type=1&url=http://fm163.163.com/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1'
       
        self . login_data = urllib . urlencode ({
            'verifycookie' : 1 ,
            'style' : - 1 ,
            'product' : 'mail163' ,
            'username' : self . username ,
            'password' : self . password ,
            'selType' : - 1 ,
            'remUser' : '' ,
            'secure' : 'on'
        })
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Refer' : 'http://mail.163.com/'
        }
        self . contact_host = 'g2a10.mail.163.com'
       

    def login( self ):
        conn = httplib . HTTPSConnection( self . login_host)
        conn . request( 'POST' , self . login_url , self . login_data , self . login_headers)
        response = conn . getresponse()
        if not str( response . status) == '200' :
            raise MailContactError( "Couldn't log in. HTTP Code: %s , %s " % ( response . status , response . reason))
       
        sc1 = response . getheader( 'Set-Cookie')
        '''
            Set-Cookie: NTES_SESS=ohAWkiyj.OCjHdh1BK4ToxPcUvFX2fSLaN3FaU0cRInzLoieELdifjyqnBdk4C8qWIZkirZ7.JF.IPFDuR7BcAtKL; domain=.163.com; path=/
            Set-Cookie: NETEASE_SSN=weafriend; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
            Set-Cookie: NETEASE_ADV=11&24&1212921746999; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT
        '''
        ntes_sess , ntes_adv = None , None
        for s in sc1 . split ():
            if s . startswith( 'NTES_SESS' ):
                ntes_sess =s [s . find( '=') + 1 :s . find( ';' )]
            elif s . startswith( 'NETEASE_ADV' ):
                ntes_adv =s [s . find( '=') + 1 :s . find( ';' )]
        if not ntes_sess or not ntes_adv :
            #用户密码不正确
            raise MailContactError( "Email user %s%s password %s not correct!" % ( self . username , self . mail_type , self . password))
       
        url = '/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1&username=weafriend'
        headers = { 'cookie' : sc1 }
        conn = httplib . HTTPConnection( 'fm163.163.com')
        conn . request( 'GET' , url ,{}, headers)
        response = conn . getresponse()
        sc2 = response . getheader( 'Set-Cookie')
        coremail = sc2 [ sc2 . find( '=') + 1 : sc2 . find( ';' )]
        sid = coremail [ coremail . find( '%') + 1 :]
        self . contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=' + sid + '&listnum=200&tempname=address %2f address.htm'
       
       
        self . contact_headers = {
        'Cookie' : 'MAIL163_SSN= %(user)s ; vjlast=1212911118; vjuids=-99d7a91f6.1156a6ea3cd.0.9e6d0e6f029e78; /
        _ntes_nuid=7118c6a1c9d16ee59a045a2e66186af8;  NTES_adMenuNum=3; /
        _ntes_nnid=7118c6a1c9d16ee59a045a2e66186af8,0|www|urs|163mail|news|ent|sports|digi|lady|tech|stock|travel|music|2008|; /
        NTES_UFC=9110001100010000000000000000000000100000000000000002331026300000; logType=-1; nts_mail_user=weafriend:-1:1; /
        Province=010; _ntes_nvst=1212911122953,|www|urs|; Coremail= %(coremail)s; /
        wmsvr_domain=g1a109.mail.163.com; ntes_mail_truename=; ntes_mail_province=; ntes_mail_sex=; mail_style=js3; /
        mail_host=g1a109.mail.163.com; mail_sid= %(sid)s ; USERTRACK=58.31.69.214.1212911333143304; /
        ntes_mail_firstpage=normal; NTES_SESS=%(ntes_sess)s; /
        NETEASE_SSN= %(user)s ; NETEASE_ADV=%(ntes_adv)s' % { 'user' : self . username , 'coremail' : coremail , 'sid' : sid , 'ntes_sess' : ntes_sess , 'ntes_adv' : ntes_adv }
        }
        return True
       
       
       
    def get_contact_page( self ):
        conn = httplib . HTTPConnection( self . contact_host)
        conn . request( 'GET' , self . contact_url , headers = self . contact_headers)
        response = conn . getresponse()
        if not str( response . status) == '200' :
            raise MailContactError( "Couldn't getc contact page. HTTP Code: %s , %s " % ( response . status , response . reason))
        page = response . read()
        conn . close()
        return page
       
    def get_contacts( self ):
        page = self . get_contact_page()
        soup = BeautifulSoup( page)
        xmps = soup . findAll( 'xmp')
        for x in xmps :
            if x [ 'id' ] . startswith( 't' ):
                self . contacts . append(( x . contents [ 0 ], x . space . string))




class SohuContact( MailContact ):
    def __init__( self , username , password ):
        self . mail_type = "@sohu.com"
        self . username = username
        self . password = password     
        self . contacts = [] 
        self . login_host = 'passport.sohu.com'       
        self . login_url = 'http://passport.sohu.com/login.jsp'
        self . login_data = urllib . urlencode ({
            'loginid' : self . username + self . mail_type ,
            'passwd' : self . password ,
            'sg' : '5175b065623bb194e85903f5e8c43386' ,
            'eru' : 'http://login.mail.sohu.com/login.php' ,
            'ru' : 'http://login.mail.sohu.com/login_comm.php' ,   
            'appid' : 1000 ,
            'fl' : '1' ,
            'ct' : 1126084880 ,
            'vr' : '1|1'       
        })
        self . login_headers = {
            'User-agent' : 'Opera/9.23' ,
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain'           
        }
        opener = urllib2 . build_opener( urllib2 . HTTPCookieProcessor( cookielib . CookieJar()))
        urllib2 . install_opener( opener)
        self . contact_host = 'www50.mail.sohu.com'
        self . contact_url = '/webapp/contact'

    def login( self ):
        req = urllib2 . Request( self . login_url , self . login_data)
        conn = urllib2 . urlopen( req)
        self . contact_url = os . path . dirname( conn . geturl()) + '/contact'
       
    def get_contacts( self ):
        req = urllib2 . Request( self . contact_url)
        conn = urllib2 . urlopen( req)
        buf = conn . readlines()
        import simplejson
        info = simplejson . loads( buf [ 0 ])
        for i in info [ 'listString' ]:
            self . contacts . append(( i [ 'name' ], i [ 'email' ]))

class HotmailContact( MailContact ):
    def __init__( self , username , password ):
        self . mail_type = "@hotmail.com"
        self . username = username
        self . password = password     
        self . contacts = [] 
        self . login_host = 'login.live.com'       
        self . login_url = '/ppsecure/post.srf?id=2'
        self . login_data = urllib . urlencode ({
            'login' : self . username + self . mail_type ,
            'passwd' : self . password ,
            'PPSX' : 'Pass' ,
            'LoginOption' : 2 ,
            'PwdPad' : 'IfYouAreReadingThisYouHaveTooMuchFreeTime' [ 0 : - len( self . password )],
            'PPFT' : 'B1S2dWnsGTFLpX9h8fxfE*ym5OABStpt0fjo%21YICXQOy1b %21x P4dRx8F1h1w6tR8ZyLP4h3TYGS8gSZGku3j7CxQ4poqr'
        })
        self . login_headers = {
            'Content-type' : 'application/x-www-form-urlencoded' ,
            'Accept' : 'text/xml,text/plain' ,
            'Cookie' : 'CkTst=G1213457870062; MobileProf=2AV3mTOwJEE8smIfIyq69wbCn08y6UX7910BtLhqTto2MYrNSBW5hhlEuGlMJdMwwGq1WcxtENCAI1JSyTNfrS23ArFLxDjBNk!xtbIj0iglbu8DQVg9TnSTPtHj975deR; MUID=C2DC0F9324AA47DCB05CE14B989D89C2; ANON=A=E81AEA51F927860B07BBA712FFFFFFFF&E=69f&W=2; s_lastvisit=1213455335875; MH=MSFT; wlidperf=throughput=2087.201125175809&latency=1.422; MSPRequ=lt=1213455763&co=1&id=2; MSPOK=uuid-d75c4c53-1b6e-433c-af95-c3c0175a48cd; CkTst=G1213455761093; MSPPre=fenyon@hotmail.com; MSPCID=0f45e10de2ad38c9; NAP=V=1.7&E=6b4&C=bKkGf4IbC96JLFhsoKyccKm1Kf7jjhX5I3C1ofjvyMoY3iI9j0b6gg&W=2; MSPSoftVis=@:@; BrowserSense=Win=1&Downlevel=0&WinIEOnly=0&Firefox=1&FirefoxVersion=2.0; mktstate=U=&E=en-us; mkt1=norm=en-us; s_cc=true; s_sq=%5B%5BB%5D%5D; MSPP3RD=3688532421' ,
            'Referer' : 'https://login.live.com/ppsecure/post.srf?id=2&bk=1213455763'
        }

        self . contact_host = 'by120w.bay120.mail.live.com'
        self . contact_url = '/mail/GetContacts.aspx'
   
    def getInputValue( self , name , content ):
        pass
    def login( self ):
        #登录过程见http://blog.jiexoo.com/2008/05/21/%e7%94%a8httpclient%e8%8e%b7%e5%8f%96hotmail%e8%81%94%e7%b3%bb%e4%ba%ba%e5%88%97%e8%a1%a8/
        conn = httplib . HTTPSConnection( self . login_host)
        conn . request( 'GET' , 'login.srf?id=2')
        response = conn . getresponse()
       
        conn = httplib . HTTPSConnection( self . login_host)
        conn . request( 'POST' , self . login_url , self . login_data , self . login_headers)
        response = conn . getresponse()
        if not str( response . status) == '200' :
            raise MailContactError( "Couldn't getc contact page. HTTP Code: %s , %s " % ( response . status , response . reason))
        page = response . read()
        print page
       
       
    def get_contacts( self ):
        conn = httplib . HTTPConnection( self . contact_host)
        conn . request( 'GET' , self . contact_url)
        response = conn . getresponse()
        if not str( response . status) == '200' :
            raise MailContactError( "Couldn't getc contact page. HTTP Code: %s , %s " % ( response . status , response . reason))
        page = response . read()
        conn . close()
        print page

class SinaContact( MailContact ):
    pass



class YahooContact( MailContact ):
    pass

class MsnContact( MailContact ):
    pass

def get_mailcontact( user , password , mailtype ):
    if mailtype == "126.com" :
        g = M126Contact( user , password)
    elif mailtype == "163.com" :
            g = M163Contact( user , password)
    elif mailtype == "sohu.com" :
            g = SohuContact( user , password)
    elif mailtype == "hotmail.com" :
            g = HotmailContact( user , password)
    elif mailtype == "sina.com" :
            g = SinaContact( user , password)   
    elif mailtype == "gmail.com" :
        g = GMailContact( user , password)
    try :
        g . login()
        g . get_contacts()
        return g . contacts
    except :
        return []
       
       



def get_csvcontact( iter ):
    contact , name = [], None
    reader = csv . reader( iter)
    for r in reader :
        for c in r :
            if not c or not len( c . strip ()):
                continue
            m = re . search( '/w+@/w+(?:/./w+)+' , c)
            if m :
                print name , m . group( 0)
                contact . append(( name , m . group( 0)))
                break
            else :
                name = c
    return contact

def get_imcontact( iter ):
    contact = []
    reader = csv . reader( iter)
    for r in reader :
        for c in r :
            m = re . search( '/w+@/w+(?:/./w+)+' , c)
            if m :
                print m
                contact . append(( m))
    return contact

if __name__ == '__main__' :
    pdb . set_trace()
    httplib . HTTPSConnection . debuglevel = 1
    httplib . HTTPConnection . debuglevel = 1   
    g = GMailContact( '***' , '***')
    g . login()
    g . get_contacts()
    print g . contacts
   
    g = M163ContactContact( '***' , '***')
    g . login()
    g . get_contacts()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值