python3批量查询域名权重、标题

最新推荐文章于 2024-03-25 10:41:43 发布

小鱼儿waha

最新推荐文章于 2024-03-25 10:41:43 发布

阅读量1.1k

点赞数

分类专栏： python 文章标签： python3批量查询权重、标题

我希望自己永远怀着一颗学习的心！

本文链接：https://blog.csdn.net/q1352483315/article/details/95333156

版权

python 专栏收录该内容

41 篇文章 0 订阅

订阅专栏

自己写了个供大家参考
用法：python3 xxx.py -r xxx.txt

#coding=utf-8
#author=cbd666
import progressbar
import argparse
import requests
import re

def main():
    # 指定-r参数
    parser = argparse.ArgumentParser()
    parser.add_argument('-r', dest='read', help='select domains file')
    args = parser.parse_args()
    html = '''
    <!DOCTYPE html>
    <html>
    <head>
        <title>批量查询权重和标题</title>
    </head>
    <style type="text/css">
        .xxxx{
            padding-left: 150px;
        }

        td{
            height: 20px;
            line-height: 20px;
            text-align: center;
            padding-left:140px;
        }

    </style>
    <body>
        <table>
            <tr>
                <th class="xxxx">域名</th>
                <th class="xxxx">权重查询</th>
                <th class="xxxx">网站标题</th>
            </tr>'''
    html_file = open('test.html','w')
    # 参数为空 输出--help命令
    if args.read == None:
        parser.print_help()
    # 从文件中读取每行的域名
    try:
        f = open(args.read,"r")
        lines = ''.join(f.readlines()).split("\n")
        print(lines)
        #tb = pt.PrettyTable(["域名","百度权重","站点标题"])
        p = progressbar.ProgressBar()
        for domain in p(lines):
            url = 'http://seo.chinaz.com/{domain}'.format(domain=domain)
            headers = {
                'Host': 'seo.chinaz.com',
                'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) Gecko/20100101 Firefox/61.0',
                'Content-Type': 'application/x-www-form-urlencoded',
            }


            data = 'm=&host={domain}'.format(domain=domain)

            try:
                response = requests.post(url=url,headers=headers,data=data,timeout=10)
                resp = response.content.decode('utf-8')
                # 百度权重正则

                baidu = re.search(r'baiduapp/(.*?).gif',resp).group(1)
                if domain[:3] == 'www' and domain[:4] != 'http' and domain[:4] !='https':
                    domain = 'http://' + domain
                # 站点标题正则
                domain_name = re.compile(r'class="ball">(.*?)</div>')
                name = domain_name.findall(resp)[0]
                html += '''
                        <tr>
                            <td><a href="{0}" target='_blank'>{1}</a></td>
                            <td>{2}</td>
                            <td>{3}</td>
                        </tr>
                '''.format(domain,domain,baidu,name)
            except Exception as e:
                pass
        print('查询完毕，结果保存在当前目录下的test.html文件中。')

    except Exception as e:
        print(e) 
    html += '''
    </table>
    </body>
    </html>
    '''
    html_file.write(html)


if __name__ == '__main__':
    main()