python的脚本,使用无头浏览器进行截图,是针对chrome的。此代码的运行,是建立在本地安装了,chrome和chromedriver(windows是chromedriver.exe)。
import re
import requests
import io
import sys
from selenium import webdriver
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')
site_baidu = u"http://www.baidu.com/s?wd=%s&pn=%d0"
def getPic(url):
chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = '/opt/google/chrome/chrome' #此处最好写绝对路径
chrome_options.add_argument('-headless')
chrome_options.add_argument('-disable-gpu') # 上面三行代码就是为了将Chrome不弹出界面,实现无界面爬取
chrome_options.add_argument('-no-sandbox')
chrome_options.add_argument('-hide-scrollbars')
#chrome_options.add_argument('blink-settings=imagesEnabled=false') 关闭加载图片,设置了,就在截图中,无法加载图片
#chrome_options.add_argument('-disable-features=NetworkService')
chrome = '/opt/google/chromedriver' #此处最好写绝对路径,
driver = webdriver.Chrome(executable_path=chrome, chrome_options=chrome_options)
driver.set_window_size(1200, 1520)
driver.get(url)
whole = '/www/wwwroot/sem.conn.in/public/photoes/weiyun.png'
print(driver.get_screenshot_as_file(whole))
#driver.save_screenshot('122.png')
driver.quit()
if __name__ == "__main__":
url1 = site_baidu % (sys.argv[1], 1)
getPic(url1)
php脚本
调用python文件,实现传递参数
namespace app\paiming\controller;
use think\Controller;
class Screenshot extends Controller
{
public function getPic()
{
// dump('1332');
define('BASE_PATH', str_replace('\\', '/', realpath(dirname(__FILE__) . '/')) . '/');
$keywords = urlencode('衡水软件开发');
$site = 'www.eshengshi.com';
$url = 'python3 ' . BASE_PATH . 'screenshot/test.py '.$site.' '.$keywords.' 2>&1';
dump($url);
unset($out);
exec($url,$out);
dump($out);
}
}
访问php的接口,测试结果,在项目的public/photoes/路径下,会多出一张图片。
array(4) {
[0] => string(74) "/www/wwwroot/sem.conn.in/application/paiming/controller/screenshot/test.py"
[1] => string(26) "rank 1: www.eshengshi.com/"
[2] => string(4) "True"
[3] => string(33) "输入的关键词排在第 1 名"
}