先定一个小目标:爬取所有文章标题,其他的属性以后再操作
- 代码
import requests
from bs4 import BeautifulSoup
link = "http://www.cnblogs.com/planche/default.html"
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36',
'Host': 'www.cnblogs.com'
}
if __name__ == '__main__':
pos=1
while(True):
key_dict = {
'page'