Python爬虫百度搜索标题

from urllib import request,parse
import urllib
import http.cookiejar
from bs4 import BeautifulSoup

word=input("请输入搜索的关键词:");
url="http://www.baidu.com/s?wd="+urllib.parse.quote(word);

headers={"Accept": "text/html, application/xhtml+xml, image/jxr, */*",

         "Accept - Encoding": "gzip, deflate, br",

         "Accept - Language": "zh - CN",

         "Connection": "Keep - Alive",
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299",
         "referer":"baidu.com"};
cjar = http.cookiejar.CookieJar();
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cjar));

headall=[];
for key,value in headers.items():
    item=(key,value);
    headall.append(item);
opener.addheaders=headall;
urllib.request.install_opener(opener);
data =urllib.request.urlopen(url).read().decode('utf-8');
soup=BeautifulSoup(data,'html.parser');
# 以格式化的形式打印html
#print(soup.prettify());
for result_table in soup.find_all('h3',class_='t'):

    a_click = result_table.find("a");

    print( "-----标题----\n" + a_click.get_text())  # 标题
    print("----链接----\n" + str(a_click.get("href")))  # 链接
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值