分析
使用requests和re模块爬取慕课网 “免费课程/数据库/“ 分类下的课程信息
代码实现
# !/usr/bin/env python
# -*- coding:utf-8 -*-
import re
import requests
import os
num=0
def crawl(url):
global num
base_url='https://www.imooc.com'
req_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
path = '../../data'
if not os.path.exists(path):
os.makedirs(path)
file_path=os.path.join(path,'慕课网课程信息.txt')
resp=requests.get(url=url,headers=req_headers)
with open(file_path,'w')