Python 网络爬虫第二讲

# 获取相应内容
import requests
r = requests.get('http://www.santostang.com/')
print("文本编码:",r.encoding)
print("相应状态码:",r.status_code)

print("字符串方式的响应体:",r.text)
# 传递URL参数
import requests
key_dict = {'key1':'value1','key2':'value2'}
r = requests.get('http://httpbin.org/get',params=key_dict)
print("URL 已经正确编码:",r.url)
print("字符串方式的响应体:\n",r.text)
# 定制请求头
import requests
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.3.17611','Host':'www.santostang.com'}

r = requests.get('http://www.santostang.com/',headers = headers)

print("相应状态码:",r.status_code)
# 发送post请求
import requests
key_dict = {'key1':'value1','key2':'value2'}
r = requests.get('http://httpbin.org/post',data = key_dict)
print(r.text)
# 获取豆瓣前250个排名的电影
import requests
from bs4 import BeautifulSoup
def get_movies():
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.3.17611','Host':'movie.douban.com'}
    movie_list = []
    for i in range(0,10):
        link = 'https://movie.douban.com/top250?start=' + str(i*25)
        r = requests.get(link, headers = headers, timeout = 10)
        print(str(i+1),"网页相应状态码:", r.status_code)

        soup = BeautifulSoup(r.text,'lxml')
        div_list = soup.find_all('div',class_='hd')
        for each in div_list:
            movie = each.a.span.text.strip()
            movie_list.append(movie)
    return movie_list

movies = get_movies()
print(movies)

输出结果为:
1 网页相应状态码: 200
2 网页相应状态码: 200
3 网页相应状态码: 200
4 网页相应状态码: 200
5 网页相应状态码: 200
6 网页相应状态码: 200
7 网页相应状态码: 200
8 网页相应状态码: 200
9 网页相应状态码: 200
10 网页相应状态码: 200
[‘肖申克的救赎’, ‘霸王别姬’, ‘这个杀手不太冷’, ‘阿甘正传’, ‘美丽人生’, ‘泰坦尼克号’, ‘千与千寻’, ‘辛德勒的名单’, ‘盗梦空间’, ‘机器人总动员’, ‘忠犬八公的故事’, ‘三傻大闹宝莱坞’, ‘海上钢琴师’, ‘放牛班的春天’, ‘大话西游之大圣娶亲’, ‘楚门的世界’, ‘教父’, ‘龙猫’, ‘星际穿越’, ‘熔炉’, ‘无间道’, ‘触不可及’, ‘乱世佳人’, ‘当幸福来敲门’, ‘怦然心动’, ‘天堂电影院’, ‘疯狂动物城’, ‘蝙蝠侠:黑暗骑士’, ‘十二怒汉’, ‘鬼子来了’, ‘活着’, ‘少年派的奇幻漂流’, ‘指环王3:王者无敌’, ‘搏击俱乐部’, ‘天空之城’, ‘控方证人’, ‘大话西游之月光宝盒’, ‘飞屋环游记’, ‘罗马假日’, ‘窃听风暴’, ‘两杆大烟枪’, ‘飞越疯人院’, ‘哈尔的移动城堡’, ‘闻香识女人’, ‘辩护人’, ‘摔跤吧!爸爸’, ‘V字仇杀队’, ‘死亡诗社’, ‘海豚湾’, ‘教父2’, ‘指环王2:双塔奇兵’, ‘美丽心灵’, ‘指环王1:魔戒再现’, ‘饮食男女’, ‘情书’, ‘美国往事’, ‘狮子王’, ‘素媛’, ‘钢琴家’, ‘小鞋子’, ‘七宗罪’, ‘被嫌弃的松子的一生’, ‘致命魔术’, ‘天使爱美丽’, ‘本杰明·巴顿奇事’, ‘西西里的美丽传说’, ‘音乐之声’, ‘拯救大兵瑞恩’, ‘勇敢的心’, ‘黑客帝国’, ‘让子弹飞’, ‘低俗小说’, ‘看不见的客人’, ‘剪刀手爱德华’, ‘沉默的羔羊’, ‘蝴蝶效应’, ‘大闹天宫’, ‘末代皇帝’, ‘入殓师’, ‘春光乍泄’, ‘心灵捕手’, ‘玛丽和马克思’, ‘哈利·波特与魔法石’, ‘阳光灿烂的日子’, ‘布达佩斯大饭店’, ‘幽灵公主’, ‘禁闭岛’, ‘第六感’, ‘重庆森林’, ‘猫鼠游戏’, ‘狩猎’, ‘致命ID’, ‘断背山’, ‘大鱼’, ‘穿条纹睡衣的男孩’, ‘加勒比海盗’, ‘射雕英雄传之东成西就’, ‘告白’, ‘甜蜜蜜’, ‘一一’, ‘阳光姐妹淘’, ‘摩登时代’, ‘阿凡达’, ‘上帝之城’, ‘爱在黎明破晓前’, ‘消失的爱人’, ‘风之谷’, ‘侧耳倾听’, ‘爱在日落黄昏时’, ‘超脱’, ‘倩女幽魂’, ‘恐怖直播’, ‘红辣椒’, ‘小森林 夏秋篇’, ‘喜剧之王’, ‘菊次郎的夏天’, ‘驯龙高手’, ‘幸福终点站’, ‘萤火虫之墓’, ‘借东西的小人阿莉埃蒂’, ‘神偷奶爸’, ‘岁月神偷’, ‘七武士’, ‘杀人回忆’, ‘怪兽电力公司’, ‘电锯惊魂’, ‘贫民窟的百万富翁’, ‘谍影重重3’, ‘喜宴’, ‘真爱至上’, ‘东邪西毒’, ‘记忆碎片’, ‘黑天鹅’, ‘疯狂原始人’, ‘海洋’, ‘雨人’, ‘小森林 冬春篇’, ‘卢旺达饭店’, ‘哈利·波特与死亡圣器(下)’, ‘7号房的礼物’, ‘英雄本色’, ‘萤火之森’, ‘请以你的名字呼唤我’, ‘心迷宫’, ‘虎口脱险’, ‘燃情岁月’, ‘傲慢与偏见’, ‘荒蛮故事’, ‘超能陆战队’, ‘海边的曼彻斯特’, ‘教父3’, ‘唐伯虎点秋香’, ‘蝙蝠侠:黑暗骑士崛起’, ‘恋恋笔记本’, ‘纵横四海’, ‘完美的世界’, ‘花样年华’, ‘时空恋旅人’, ‘玩具总动员3’, ‘魂断蓝桥’, ‘猜火车’, ‘穿越时空的少女’, ‘达拉斯买家俱乐部’, ‘雨中曲’, ‘二十二’, ‘我是山姆’, ‘无人知晓’, ‘人工智能’, ‘冰川时代’, ‘血战钢锯岭’, ‘浪潮’, ‘爆裂鼓手’, ‘香水’, ‘朗读者’, ‘罗生门’, ‘未麻的部屋’, ‘阿飞正传’, ‘被解救的姜戈’, ‘头脑特工队’, ‘可可西里’, ‘恐怖游轮’, ‘一次别离’, ‘追随’, ‘撞车’, ‘战争之王’, ‘房间’, ‘地球上的星星’, ‘你的名字。’, ‘魔女宅急便’, ‘梦之安魂曲’, ‘模仿游戏’, ‘谍影重重’, ‘牯岭街少年杀人事件’, ‘忠犬八公物语’, ‘谍影重重2’, ‘一个叫欧维的男人决定去死’, ‘惊魂记’, ‘完美陌生人’, ‘青蛇’, ‘哪吒闹海’, ‘再次出发之纽约遇见你’, ‘小萝莉的猴神大叔’, ‘黑客帝国3:矩阵革命’, ‘东京物语’, ‘源代码’, ‘新龙门客栈’, ‘终结者2:审判日’, ‘末路狂花’, ‘秒速5厘米’, ‘绿里奇迹’, ‘碧海蓝天’, ‘步履不停’, ‘勇闯夺命岛’, ‘城市之光’, ‘这个男人来自地球’, ‘初恋这件小事’, ‘海街日记’, ‘无耻混蛋’, ‘海盗电台’, ‘无敌破坏王’, ‘卡萨布兰卡’, ‘变脸’, ‘爱在午夜降临前’, ‘E.T. 外星人’, ‘疯狂的石头’, ‘发条橙’, ‘黄金三镖客’, ‘美国丽人’, ‘荒野生存’, ‘英国病人’, ‘彗星来的那一夜’, ‘迁徙的鸟’, ‘血钻’, ‘国王的演讲’, ‘聚焦’, ‘非常嫌疑犯’, ‘燕尾蝶’, ‘勇士’, ‘叫我第一名’, ‘穆赫兰道’, ‘遗愿清单’, ‘黑鹰坠落’, ‘我爱你’, ‘枪火’, ‘荒岛余生’, ‘上帝也疯狂’, ‘大卫·戈尔的一生’, ‘千钧一发’, ‘2001太空漫游’, ‘蓝色大门’]

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值