# -*- coding: GBK -*-
"""
爬取手机号段归属地
"""
import time
import requests
from lxml import etree
time_start = time.time() # 程序开始时间
url = 'http://www.ip138.com:8080/search.asp?'
param = {'action': 'mobile', 'mobile': '1990012'}
file = open("C:\\Users\\yang\\Desktop\\phoneNumber.txt", "a+", encoding='utf-8')
for n1 in range(0, 10):
for n2 in range(0, 10):
for n3 in range(0, 10):
for n4 in range(0, 10):
print("!!!n1: "+str(n1)+" n2: "+str(n2)+" n3: "+str(n3)+" n4: " + str(n4))
param['mobile'] = '199'+str(n1)+str(n2)+str(n3)+str(n4)
rq = requests.get(url, param)
rq.encoding = 'GBK'
page = etree.HTML(rq.text)
hs = page.xpath('/html/body/table/tr/td[@class="tdc2"]')
sum = 0
while hs[1].text is None:
rq = requests.get(url, param)
rq.encoding = 'GBK'
page = etree.HTML(rq.text)
hs = page.xpath('/html/body/table/tr/td[@class="tdc2"]')
sum += 1
if hs is not None:
break
if sum == 20:
break
if hs[1].text is not None and hs[1].text is not '未知' and hs[1].text is not '':
resultStr = param['mobile'] + " " + hs[1].text.strip()+"\n"
file.write(resultStr)
file.close()
time_end = time.time() # 程序结束时间
print('\r程序运行时间:', time_end - time_start)
数据下载址:
https://download.csdn.net/download/qq_41228463/10470817