linux通过部署https网站展示爬虫结果
最终结果
让我们回顾一下前面几篇文章
1.python fastapi get,post,静态资源访问 使用方式
2.python爬取图片链接(附带一个html装逼特效)
3.nginx ssl配置
以上文章展示了实现的关键技术📕(仅供个人娱乐)
网页代码实现
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>表情包搜索</title>
<link rel="icon" href="static/favicon.ico">
</head>
<body>
<input type="text" id="key" />
<button id="btn">搜索表情包</button>
<div id="result">
</div>
</body>
<script>
btn.onclick = function() {
key = document.getElementById("key").value
if(key==""){return}
var xhr = new XMLHttpRequest();
xhr.open('POST', 'http://s.cn/imgpath'); //根据自己的网站修改,是https的网站就改成https
xhr.setRequestHeader("Content-type", "application/json");
xhr.send(JSON.stringify({"item":{"key" : '"'+key+'"'}}));
xhr.onreadystatechange = function() {
console.log(xhr.readyState);
if (xhr.readyState === 4) {
if (xhr.status >= 200 && xhr.status < 300) {
// 行 头 空行 体
// console.log(xhr.status); //状态码
// console.log(xhr.statusText); //状态字符串
// console.log(xhr.getAllResponseHeaders()); //所有响应头
// console.log(xhr.response); //响应体
var arr=JSON.parse(xhr.response);
result.innerHTML = ""
for(var index in arr){
result.innerHTML += "<img src='"+arr[index]+"'>";
}
}
}
}
}
</script>
</html>
1.不使用nginx代理
from fastapi import FastAPI,Body
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
import uvicorn as uvicorn
import re
import requests
from pydantic import BaseModel
app = FastAPI()
class Item(BaseModel):
key: str
app.mount("/static", StaticFiles(directory="static"), name="static")
# 按钮按下接口
@app.get('/', response_class=HTMLResponse)
async def index():
with open("index.html", "rb") as f: # 打开文件
data = f.read() # 读取文件
return data
@app.post('/imgpath')
async def index(item: Item = Body(..., embed=True)):
print(item.key)
url = 'https://www.biaoqingbao.net/?post_type=post&s=%s'%item.key
strhtml = requests.get(url) #Get方式获取网页数据
#匹配图片网址
pic_url = re.findall('img class="waitpic" .*? data-original="([^=]*?)"',strhtml.text,re.S)
return pic_url
if __name__ == '__main__':
uvicorn.run(app =app, host = '0.0.0.0', port=80)
2.使用nginx代理
如果选择不适用nginx代理,就不用看2了,1和2是两种方式
2.1python服务端代码实现
from fastapi import FastAPI,Body
import uvicorn as uvicorn
import re
import requests
from pydantic import BaseModel
app = FastAPI()
class Item(BaseModel):
key: str
@app.post('/imgpath')
async def index(item: Item = Body(..., embed=True)):
print(item.key)
url = 'https://www.biaoqingbao.net/?post_type=post&s=%s'%item.key
strhtml = requests.get(url) #Get方式获取网页数据
#匹配图片网址
pic_url = re.findall('img class="waitpic" .*? data-original="([^=]*?)"',strhtml.text,re.S)
return pic_url
if __name__ == '__main__':
uvicorn.run(app =app, host = '0.0.0.0', port=8000)
2.2nginx转发配置
修改/etc/nginx/sites-available/default 配置文件,根据自己域名修改,本地使用127.0.0.1
server { # https
listen 443 ssl;
server_name 0nly.cn; # 证书的域名
ssl_certificate /root/cert/6805403_www.0nly.cn.crt; # 证书地址
ssl_certificate_key /root/cert/6805403_www.0nly.cn.key; # 证书地址
ssl_session_timeout 5m;
ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:HIGH:!aNULL:!MD5:!RC4:!DHE;
ssl_prefer_server_ciphers on;
location / {
root /var/www/html/; # http 的目录 这里是一个静态页面
autoindex on;
index index.html;
}
location /imgpath {
proxy_pass http://127.0.0.1:8000/imgpath; # 注意转发到http
}
}
server { # http
listen 80;
server_name 0nly.cn;
return 301 https://$server_name$request_uri; # http自动转发到https
}