抓取网页中手机号码

telephone.py

# coding:utf-8
import re
import requests
import os
import sys
from requests.exceptions import ReadTimeout,HTTPError,RequestException

global data

fw = open("telphone.txt", 'w')

fr = open("urls.txt", 'r')
while 1:
    lines = fr.readlines(1000)
    if not lines:
        break
    for line in lines:
        print(line)
            
        try:
            r = requests.get(line)
        except ReadTimeout:
        except HTTPError:
        except RequestException:
        else:
            data = r.text
            link_list = re.findall(r"\d+", data)
            for url in link_list:
                if( len(url) == 11 ):
                    #print(url+"\n")
                    if( url[0] == '1'):
                        if( url[1] == '3' or url[1] == '4' or url[1] == '5' or url[1] == '7' or url[1] == '8'):
                            print(url+"\n")
                            fw.write(url+"\n")
        fw.write("\n")
		
        link_list2 = re.findall(r"(?<=href=\").*?(?=\")",data)
        for url2 in link_list2:
            try:
                r2 = requests.get(url2)
                #print(r2.status_code)
            except ReadTimeout:
            except HTTPError:
            except RequestException:
            else:
                data2 = r2.text
                link_list3 = re.findall(r"\d+", data2)
                for url3 in link_list3:
                    if(len(url3) == 11):
                        if(url3[0] == '1'):
                            if( url3[1] == '3' or url3[1] == '4' or url3[1] == '5' or url3[1] == '7' or url3[1] == '8'):
                                print(url3+"\n")
                                fw.write(url3+"\n")
        fw.write("\n\n")

fw.close()
                    
fr.close()

os.system("pause")

  • 1
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值