Day3(2)

6.百度

import requests

url = 'http://www.baidu.com'

response = requests.get(url)

with open('baidu.html', 'wb') as f:
    # response = urllib.request.urlopen(url)
    # resposne.content 相当于 response.read()
    f.write(response.content)

7.西刺代理

import requests

# url
url = 'http://www.xicidaili.com'
# 添加 proxy
proxy = {
    'http': 'http://root:Yao+ql2011@101.200.50.18:8118'
}
# 添加headers
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
# 调用requests, 得到response
response = requests.get(url, headers=headers, proxies=proxy)
# response.text = response.content.decode()
# response.text 类型是 str
# response.content 类型是 bytes
print(response.text)
# with open('xicidaili.html', 'wb') as f:
#     f.write(response.content)

雪球网房产前7页

from urllib import request
from http import cookiejar
import json
url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111'
def xueqiu(number=1,max_id=None,count=None):
    if max_id is None:
        full_url = url.format(-1,10)
    else:
        full_url = url.format(max_id,count)
    count = 15
    headers = {
        'Accept': '*/*',
        # 'Accept-Encoding': 'gzip, deflate, br',
        # 'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Cookie': 'device_id=3049fba19293376977728a287084d21f; _ga=GA1.2.780783310.1531212991; s=e212ctwtfc; __utmz=1.1531213044.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=1.780783310.1531212991.1531213044.1531220599.2; aliyungf_tc=AQAAADtGMFhh1gsAUhVFeSfUZkqI1Vuj; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _gid=GA1.2.1152894742.1534296305; _gat_gtag_UA_16079156_4=1; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534296305; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534296305; u=211534296306130',
        'Host': 'xueqiu.com',
        'Referer': 'https://xueqiu.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }
    if number<=7:
        print('第%d页:'%number)
        number += 1
        req = request.Request(full_url,headers=headers)
        response = request.urlopen(req)
        result = response.read().decode('utf-8')
        j = json.loads(result)
        for i in j['list']:
            detail = json.loads(i['data'])
            #
            print(i['id'],detail['title'])
            # with open('xueqiu.html','a')as f:
            #     f.write(i['id'],detail['title'])
        xueqiu(number,j['list'][-1]['id'],count)

if __name__ == '__main__':
    xueqiu(1,-1,10)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值