获取特定搜索目标特征url
利用百度、谷歌、bing搜索引擎高级语法生成 指定特征结果的搜索url.
import urllib.parse
import sys
"""
example:
query = "nginx reverse proxy"
accurate_query = "accurate_query"
site = "nginx.org"
filetype = "pdf"
inurl = "config"
intitle = "setup"
after = "2022-01-01"
before = "2023-01-01"
ip = "8.8.8.8",
page_num = 1
"""
def generate_bing_search_url(query=None, accurate_query=None, site=None, filetype=None, inurl=None, intitle=None,
ip=None,
after=None, before=None, page_num = 1):
# 基本查询词
base_url = "https://www.bing.com/search"
search_query = ""
# 构建查询字符串,
if query:
search_query = query
# 处理精确搜索
elif accurate_query:
search_query = f'"{query}"'
# 处理 site 限定
if site:
search_query += f" site:{site}"
# 处理文件类型 filetype
if filetype:
search_query += f" filetype:{filetype}"
# 处理 URL 包含特定字符串
if inurl:
search_query += f" inurl:{inurl}"
# 处理标题包含特定关键词
if intitle:
search_query += f" intitle:{intitle}"
# 处理指定ip托管网站
if ip:
search_query += f" ip:{ip}"
# 处理时间范围
if after:
search_query += f" after:{after}"
if before:
search_query += f" before:{before}"
if not search_query:
return ""
# URL 编码
params = {'q': search_query}
# 分页
params["first"] = (int(page_num) - 1) * 10
url = f"{base_url}?{urllib.parse.urlencode(params)}"
return url
def generate_baidu_search_url(query=None, site=None, filetype=None, inurl=None, intitle=None, accurate_query=None,
ip=None, after=None, before=None, page_num = 1):
# 基本查询词
base_url = "https://www.baidu.com/s"
if query:
search_query = query
# 如果需要精确匹配,将查询词放在双引号中
elif accurate_query:
search_query = f'"{query}"'
# 处理 site 限定
if site:
search_query += f" site:{site}"
# 处理文件类型 filetype
if filetype:
search_query += f" filetype:{filetype}"
# 处理 URL 包含特定字符串
if inurl:
search_query += f" inurl:{inurl}"
# 处理标题包含特定关键词
if intitle:
search_query += f" intitle:{intitle}"
# 处理 IP 地址
if ip:
search_query += f" {ip}"
# 处理时间范围
if after:
search_query += f" after:{after}"
if before:
search_query += f" before:{before}"
if not search_query:
return ""
# URL 编码
params = {'wd': search_query}
# 分页
params["pn"] = (int(page_num) - 1) * 10
url = f"{base_url}?{urllib.parse.urlencode(params)}"
return url
def generate_google_search_url(query=None, site=None, filetype=None, inurl=None, intitle=None, after=None, before=None, ip = None,
accurate_query=None, page_num = 1):
# 基本查询词
base_url = "https://www.google.com/search"
if query:
search_query = query
# 如果需要精确匹配,将查询词放在双引号中
elif accurate_query:
search_query = f'"{query}"'
# 处理 site 限定
if site:
search_query += f" site:{site}"
# 处理文件类型 filetype
if filetype:
search_query += f" filetype:{filetype}"
# 处理 URL 包含特定字符串
if inurl:
search_query += f" inurl:{inurl}"
# 处理标题包含特定关键词
if intitle:
search_query += f" intitle:{intitle}"
# 处理时间范围
if after:
search_query += f" after:{after}"
if before:
search_query += f" before:{before}"
# 处理 IP 地址
if ip:
search_query += f" {ip}"
# 处理时间范围
if after:
search_query += f" after:{after}"
if before:
search_query += f" before:{before}"
if not search_query:
return ""
# URL 编码
params = {'q': search_query}
# 分页
params["start"] = (int(page_num) - 1) * 10
url = f"{base_url}?{urllib.parse.urlencode(params)}"
return url
if __name__ == "__main__":
type = input("选择搜索引擎 0 bing 1 baidu 2 google")
engine_type = None
if type == "0":
engine_type = generate_bing_search_url
elif type == "1":
engine_type = generate_baidu_search_url
elif type == "2":
engine_type = generate_google_search_url
else:
print("未指定搜索引擎")
sys.exit(0)
# 示例使用
query = input("指定搜索字符串")
accurate_query = input("指定精确搜索字符串")
page_num = input("指定获取页码")
site = input("指定 目标搜索限制域名")
filetype = input("指定 目标页面的文件格式")
inurl = input("指定搜索咪表URL中包含的关键词")
intitle = input("指定目标网站标题包含指定关键词")
after = input("指定 目标网站发布的起始时间")
before = input("指定网页发布的终止时间")
ip = input("指定目标网站托管的IP")
page_num = page_num if page_num else 1
# 生成搜索 URL
search_url = engine_type(query, site=site, filetype=filetype, inurl=inurl, intitle=intitle, ip=ip,
after=after,
before=before, page_num= page_num)
print(search_url)