requests爬谷歌

import requests
from bs4 import BeautifulSoup
import json
import time
import csv

url='https://www.google.com/search'
params = {
    'safe': 'active',
    'sxsrf': 'ALeKk03BfKAgtDobSNR4uteXut6N__y38g:1611033698497',
    'ei': 'YmwGYLPyHdXh-AbPzKToCQ',
    'q': '',
    'gs_ssp': 'eJzj4tLP1TcwKzcqzjM1YPTizSotLsnMU0jKTE1KLQIAcHYIsw',
    'oq': 'justi',
    'gs_lcp': 'CgZwc3ktYWIQAxgAMg0ILhCxAxCDARBDEJMCMgoILhCxAxCDARBDMgQILhBDMgcILhCxAxBDMgcIABCxAxBDMgcILhCxAxBDMgQIABBDMggILhCxAxCDATICCAAyBQgAELEDOgUIABCRAjoICAAQsQMQgwE6BQguELEDUOqpBljaugZgjMsGaABwAngBgAGXBYgBvRKSAQkyLTEuMC4yLjKYAQCgAQGqAQdnd3Mtd2l6wAEB',
    'sclient': 'psy-ab',
    'start': '',
    'sa': 'N',
    'ved': '2ahUKEwjf-LW_t6fuAhWFd94KHXqXBo0Q8tMDegQIVxA2',
    'biw': '876',
    'bih': '900',
    'dpr': '1.5'
}

headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-language': 'en,zh-CN;q=0.9,zh;q=0.8',
    'cache-control': 'no-cache',
    'cookie': 'CGIC=IocBdGV4dC9odG1sLGFwcGxpY2F0aW9uL3hodG1sK3htbCxhcHBsaWNhdGlvbi94bWw7cT0wLjksaW1hZ2UvYXZpZixpbWFnZS93ZWJwLGltYWdlL2FwbmcsKi8qO3E9MC44LGFwcGxpY2F0aW9uL3NpZ25lZC1leGNoYW5nZTt2PWIzO3E9MC45; HSID=AtL1Fuks6GN88NgIF; SSID=A5G1V1zzIEPkl30hy; APISID=R62Dg908smzBc75_/AE6wtPR0lHi3PNcyr; SAPISID=DhY-UzCNe22FLTow/AjmaKK4_-XFREXtVX; __Secure-3PAPISID=DhY-UzCNe22FLTow/AjmaKK4_-XFREXtVX; SID=5AcO7y9MYqK1JBYbOB9T7xeWcJwKQjOtRbbCh60AFgdUo6QyiT-wrT0furAG-H4tCk87hA.; __Secure-3PSID=5AcO7y9MYqK1JBYbOB9T7xeWcJwKQjOtRbbCh60AFgdUo6Qyoq7YhstvGbLeLKfl7S3HHw.; OTZ=5796681_24_24__24_; SEARCH_SAMESITE=CgQI0ZEB; ANID=AHWqTUnyp6Ge1xtQ_TL0NAZmTvouupte3kUSnVW6oKAZd5CJZEL6eTEKr8Dvuy3J; 1P_JAR=2021-01-19-03; NID=207=QJ3H1_PEEqH87e2HJ9-LYqdl8T4kq3B7Ybxa6cnWTvf6FOu5kuFPIMN6sjPTa6uGQQgd_ILLwdgrFexdNcG1edmOLPwamevgB8wWBZTt8zDQ0C1qGnoO-0HT4-DD2bHyIv4mYZhQXkQwYYY1YlC1woUo4hIJbB6fI9shEBm_UIAElqTHRWjiudjgWc_VM69_cCacl4muQHElSfs-ok-7L6w0kp8-3pl6A8YVtdhoev3ms0LXWcOyTpmRP9vhrHNKBuZpSeRAuCGLI2PLSqtQspyzHMf3A18sqZIUM2hCgIhExcjR1UKwRrn7ikGllVwFqFSJDXAD9A; SIDCC=AJi4QfEXFZmOSCUgZxKqFr4vUETalX_wQAJOEnDTUtPaFDY2oUOdAha_UIyodIM1esXg72G-8w; __Secure-3PSIDCC=AJi4QfHq0q1H0qhjNe1pbyTuSEzm0nH3jNZD2QTrv-4yP_7QVwZ28wEHI0d7YL1SGNRlMlF1ow',
    'pragma': 'no-cache',
    'referer': 'https://www.google.com/',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
    'x-client-data': 'CIi2yQEIpLbJAQjBtskBCKmdygEIx8LKAQisx8oBCLTLygEIpM3KAQjAz8oBCNzVygEIlJrLAQjNmssBCNScywEIqZ3LAQiqncsBCK6dywEY+bjKARiqm8sB' 
    }

#    'accept-encoding': 'gzip, deflate, br',

params['q'] = 'justin bieber'
results = []
for i in range(0,5):
    params['start'] = str(i * 10)
    response = requests.get(url,params=params,headers=headers)
    print('Get Response from : %s | Status Code : %s' %(response.url, response.status_code))
    content = BeautifulSoup(response.text, "lxml")

    blocks = content.findAll('div',{'class','tF2Cxc'})
    for block in blocks:
        item = {
            'Title' : block.find('h3',{'class','LC20lb DKV0Md'}).text.encode('ascii', 'ignore'),
            'Link' : block.find('div',{'class','yuRUbf'}).find('a')['href'],
            'Description' : block.find('span',{'class','aCOpRe'}).text.encode('ascii', 'ignore')
        }
        #print(json.dumps(item,indent=2))
        results.append(item)
    
    time.sleep(3)

with open('JBscrape.csv','w',newline='') as csv_file:
    writer = csv.DictWriter(csv_file, results[0].keys())
    writer.writeheader()
    for row in results:
        writer.writerow(row)
    
    print('Exported results to "JBscrape.csv" file')

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
使用Python谷歌图片可以通过以下步骤实现: 1. 导入必要的库:使用Python中的requests库来发送HTTP请求,使用BeautifulSoup库来解析网页内容。 2. 定义搜索关键词:设置要搜索的关键词。 3. 发送HTTP请求:使用requests库发送HTTP GET请求到谷歌图片的搜索页面,并传入搜索关键词作为参数。 4. 解析网页内容:使用BeautifulSoup库解析返回的HTML页面内容,通过查找相关的标签和类来提取出图片的URL地址。 5. 下载图片:使用requests库发送GET请求获取图片的URL,并将图片保存到本地。 以下为一个简单的示例代码: ```python import requests from bs4 import BeautifulSoup def download_google_images(keyword, num_images): url = f'https://www.google.com/search?q={keyword}&tbm=isch' response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') images = soup.find_all('img') image_urls = [img['src'] for img in images] for i, image_url in enumerate(image_urls[:num_images]): response = requests.get(image_url) with open(f'image_{i}.jpg', 'wb') as f: f.write(response.content) keyword = "猫咪" num_images = 5 download_google_images(keyword, num_images) ``` 在上述代码中,我们定义了一个`download_google_images`函数,该函数接受两个参数:`keyword`表示要搜索的关键词,`num_images`表示要下载的图片数量。函数内部首先构建了谷歌图片搜索的URL,然后发送HTTP请求获取搜索结果页面。接着使用BeautifulSoup库解析网页内容,查找所有的图片标签,并提取出图片的URL。最后,使用requests库发送GET请求获取图片,并将图片保存到本地。 注意:谷歌的图像搜索可能对虫做了限制,需要通过访问频率控制等方式避免被封禁。另外,取他人的内容请遵守法律和道德准则。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值