写的比较简单,还需要近一步优化,比如采用模块的调用。
import requests
import re
import sys
import io
url=('http://www.tcmap.com.cn/yunnan/')
html=requests.get(url)
html.encoding='gbk'
txt=html.text
#昆明
shi=re.findall('<strong><a href(.*?)</strong></td>',
txt,re.S)
for shi1 in shi:
csm=re.findall('class=blue>(.*?)</a>',
shi1,re.S)
for csm1 in csm:
print(csm1)
wangye=re.findall('=(.*?) class',
shi1,re.S)
for wz in wangye:
wz1=('http://www.tcmap.com.cn'+wz)
ht1=requests.get(wz1)
ht1.encoding='gbk'
t1=ht1.text
qu=re.findall('<strong><a href(.*?)</strong></td>',
t1,re.S)
for qu1 in qu:
qm=re.findall('class=blue>(.*?)</a>',
qu1,re.S)
for qm1 in qm:
print(' '+qm1)
wangye2=re.findall('=(.*?) class',
qu1,re.S)
for wy2 in wangye2:
wz2=('http://www.tcmap.com.cn'+wy2)
ht2=requests.get(wz2)
ht2.encoding='gbk'
t2=ht2.text
jiedao=re.findall('<strong><a href(.*?)</strong></td>',
t2,re.S)
for jd in jiedao:
jdm=re.findall('class=blue>(.*?)</a>',
jd,re.S)
for jm in jdm:
print(' '+jm)
wangye3=re.findall('=(.*?) class',jd,re.S)
for wy3 in wangye3:
wz3=('http://www.tcmap.com.cn'+wy3)
ht3=requests.get(wz3)
ht3.encoding='gbk'
t3=ht3.text
quyu=re.findall('<div class=f12 style="padding:0 0 0 10px;">(.*?)<a></a>',t3,re.S)
for z in quyu:
print(' '+z)