import requests
import chardet
import re
import logging
logging.basicConfig(level=logging.WARNING,format='%(asctime)s-%(levelname)s:%(message)s')
def get_timu():
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Host':'www.runoob.com'}
for i in range(100):
link = 'https://www.runoob.com/python/python-exercise-example{}.html'.format(i+1)
r = requests.get(link,headers=headers,timeout=10)
# 解决中文乱码P149《python网络爬虫从入门到实践》
after_gzip = r.content
after_gzip = after_gzip.decode('utf-8')
timu = re.search('题目:</strong>([\w\W]*?)</p>',after_gzip)
try:
timu = timu.group(1)
except Exception as e:
print(e)
logging.warning('timu {}'.format(i+1))
timu = 'dss'
try:
fenxi = re.search('程序分析:</strong>([\w\W]*?)<div class="example">',after_gzip)
fenxi = fenxi.group(1)
except:
try:
fenxi = re.search('程序分析:</strong>([\w\W]*?)程序源代码:',after_gzip)
fenxi = fenxi.group(1)
except:
fenxi = 'syy'
logging.warning('fenxi {}'.format(i+1))
fenxi = fenxi.replace('程序源代码:',' ')
fenxi = fenxi.replace('</p>',' ')
fenxi = fenxi.replace('<p>',' ')
with open(r"百例题目.txt","a",encoding='utf-8') as f:
f.write('实例{}\n'.format(i+1))
f.write('题目: '+timu+'\n')
f.write('分析: '+fenxi+'\n\n')
with open(r"百例题目.txt","w",encoding='utf-8') as f:
f.write('\n')
get_timu()