Python与seo,百度关键词相关搜索关键词采集源码

百度关键词相关搜索关键词采集源码

瞎写的,看看就好!

#百度相关搜索
# -*- coding=utf-8 -*-
#20200714 by 微信:huguo00289


import requests,time,random
from lxml import etree
from fake_useragent import UserAgent


def get_keyword(keyword):
    data=[]
    ua=UserAgent()
    headers={
        'Cookie': 'PSTM=1558408522; BIDUPSID=BFDF2424811E5E531D933DC854B78C67; BAIDUID=BFDF2424811E5E531D933DC854B78C67:SL=0:NR=10:FG=1; MSA_WH=375_812; BD_UPN=12314353; H_WISE_SIDS=144367_142699_144157_142019_144883_141875_141744_143161_144989_144420_144134_142919_144483_136861_131246_137745_144743_138883_140259_141942_127969_144171_140065_144338_140593_143057_141808_140350_144608_144727_143923_131423_144289_142206_144220_144501_107312_143949_144105_144306_143478_144966_142911_140312_143549_143647_144239_142113_143855_136751_140842_110085; BDUSS_BFESS=1vQzN4d0pPNzB2MUQyUUQtV3d6OEZzYldhN2FWUm1RZEZ3UUVyb1Y1Mmtqc0JlSVFBQUFBJCQAAAAAAAAAAAEAAACgwJmS08W4xcTuAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKQBmV6kAZleVW; MCITY=-%3A; sug=3; sugstore=0; ORIGIN=0; bdime=0; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=LuuOJexroG3_dMRrBfK9UG9zgmKK0gOTDYLEUamaI2AU2V4VN4vPEG0Pt_U-mEt-J8jwogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tbkD_C-MfIvhDRTvhCcjh-FSMgTBKI62aKDs2P5aBhcqJ-ovQTbrbMuwK45hB5cP3b5E0b6cWKJJ8UbeWfvp3t_D-tuH3lLHQJnp2DbKLp5nhMJmBp_VhfL3qtCOaJby523ion3vQpP-OpQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xXj_0-nDSHH-tt6De3j; delPer=0; BD_CK_SAM=1; PSINO=7; H_PS_PSSID=1457_31670_32141_32139_32046_32230_32092_32298_26350_32261; COOKIE_SESSION=7_0_4_5_9_5_0_3_2_3_0_0_1608_0_0_0_1594720087_0_1594723266%7C9%23328033_18_1594447339%7C9; H_PS_645EC=8046hkQMotVPI51%2B5I0oGWsgl5ams9mPpS71Aw1L%2FgLPGzpf4I2A6FpO8U4',
        #'User-Agent': random.choice(ua_list)
        'User-Agent': ua.random,
    }
    url=f"https://www.baidu.com/s?wd={keyword}&ie=UTF-8"
    html=requests.get(url,headers=headers,timeout=5).content.decode('utf-8')
    time.sleep(2)
    try:
        req=etree.HTML(html)
        tt=req.xpath('//div[@id="rs"]//text()')
        tt.remove('相关搜索')
        print(tt)
        data=tt
    except Exception as e:
        print(e.args)
        time.sleep(5)
        print(f">> 等待5s,正在尝试重新采集 {keyword} 相关关键词")
        get_ua_keyword(keyword)


    return data


def get_ua_keyword(keyword):
    data = []
    print(f'>> 正在采集 {keyword} 相关关键词..')
    ua_list = [
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0Firefox 4.0.1',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36Chrome 17.0',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1',
    ]
    headers = {
        'Cookie': 'PSTM=1558408522; BIDUPSID=BFDF2424811E5E531D933DC854B78C67; BAIDUID=BFDF2424811E5E531D933DC854B78C67:SL=0:NR=10:FG=1; MSA_WH=375_812; BD_UPN=12314353; H_WISE_SIDS=144367_142699_144157_142019_144883_141875_141744_143161_144989_144420_144134_142919_144483_136861_131246_137745_144743_138883_140259_141942_127969_144171_140065_144338_140593_143057_141808_140350_144608_144727_143923_131423_144289_142206_144220_144501_107312_143949_144105_144306_143478_144966_142911_140312_143549_143647_144239_142113_143855_136751_140842_110085; BDUSS_BFESS=1vQzN4d0pPNzB2MUQyUUQtV3d6OEZzYldhN2FWUm1RZEZ3UUVyb1Y1Mmtqc0JlSVFBQUFBJCQAAAAAAAAAAAEAAACgwJmS08W4xcTuAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKQBmV6kAZleVW; MCITY=-%3A; sug=3; sugstore=0; ORIGIN=0; bdime=0; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=LuuOJexroG3_dMRrBfK9UG9zgmKK0gOTDYLEUamaI2AU2V4VN4vPEG0Pt_U-mEt-J8jwogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tbkD_C-MfIvhDRTvhCcjh-FSMgTBKI62aKDs2P5aBhcqJ-ovQTbrbMuwK45hB5cP3b5E0b6cWKJJ8UbeWfvp3t_D-tuH3lLHQJnp2DbKLp5nhMJmBp_VhfL3qtCOaJby523ion3vQpP-OpQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xXj_0-nDSHH-tt6De3j; delPer=0; BD_CK_SAM=1; PSINO=7; H_PS_PSSID=1457_31670_32141_32139_32046_32230_32092_32298_26350_32261; COOKIE_SESSION=7_0_4_5_9_5_0_3_2_3_0_0_1608_0_0_0_1594720087_0_1594723266%7C9%23328033_18_1594447339%7C9; H_PS_645EC=8046hkQMotVPI51%2B5I0oGWsgl5ams9mPpS71Aw1L%2FgLPGzpf4I2A6FpO8U4',
         'User-Agent': random.choice(ua_list)
    }
    url = f"https://www.baidu.com/s?wd={keyword}&ie=UTF-8"
    html = requests.get(url, headers=headers, timeout=5).content.decode('utf-8')
    time.sleep(2)
    try:
        if '相关搜索' in html:
            req = etree.HTML(html)
            tt = req.xpath('//div[@id="rs"]//text()')
            tt.remove('相关搜索')
            print(tt)
            data = tt
        else:
            print(f">> {keyword} 无相关关键词!! ")
            data=[]
    except Exception as e:
        print(e.args)
        print(f">> 采集 {keyword} 相关关键词失败!! ")
        print('>> 正在保存失败关键词..')
        with open('fail_keywords.txt', 'a+', encoding='utf-8') as f:
            f.write(f'{keyword}\n')


    return data


def lead_keywords():
    print('>> 正在导入关键词列表..')
    try:
        with open('keyss.txt','r',encoding='gbk') as f:
            keywords=f.readlines()
    except:
        with open('keyss.txt','r',encoding='utf-8') as f:
            keywords=f.readlines()


    print(keywords)
    print('>> 正在导入关键词列表成功!')


    return keywords






def save(datas):
    print('>> 正在保存相关关键词列表..')
    with open('keywords.txt', 'w', encoding='utf-8') as f:
        f.write('\n'.join(datas))


    print('>> 正在保存相关关键词列表成功!')






def main():
    datas=[]
    keywords=lead_keywords()
    for keyword in keywords:
        keyword.strip()
        data=get_keyword(keyword)
        datas.extend(data)


    save(datas)




if __name__ == '__main__':
    main()

免责声明:代码仅学习使用,勿用于非法用途

有趣的灵魂在等你

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
批量查询百度关键词排名可以使用Python中的requests和beautifulsoup库来实现。具体步骤如下: 1. 读取包含关键词和网站的CSV文件。 2. 构造百度搜索的URL。 3. 发送请求并获取页面内容。 4. 使用beautifulsoup解析页面,查找排名信息。 5. 将结果保存到CSV文件中。 以下是一个简单的Python代码片段,可以实现批量查询关键词百度上指定网站的排名: ```python import requests from bs4 import BeautifulSoup import csv # 读取包含关键词和网站的CSV文件 with open('keywords.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f) keywords = [row for row in reader] # 构造百度搜索的URL url_template = 'https://www.baidu.com/s?wd={}' results = [] # 遍历所有关键词和网站,查询排名 for keyword, site in keywords: url = url_template.format(keyword) # 发送请求并获取页面内容 response = requests.get(url) html = response.text # 使用beautifulsoup解析页面 soup = BeautifulSoup(html, 'html.parser') # 查找排名信息 search_results = soup.find_all('div', class_='result c-container ') # 遍历所有搜索结果,查找指定网站的排名 for i, result in enumerate(search_results): link = result.find('a')['href'] if site in link: results.append([keyword, site, i+1]) break else: results.append([keyword, site, '未找到排名']) # 将结果保存到CSV文件中 with open('results.csv', 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f) writer.writerow(['关键词', '网站', '排名']) writer.writerows(results) ``` 这段代码可以批量查询关键词百度上指定网站的排名,并将结果保存到CSV文件中。需要注意的是,在实际使用中,需要根据百度搜索结果页面结构的变化来调整代码。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值