爬虫
import requests
from lxml import etree
import sqlite3
import datetime
conn = sqlite3.connect('movie.db')
cursor = conn.cursor()
sql = 'insert into movie(m_id,m_name,m_img) values (:m_id,:m_name,:m_img)'
url_news = 'https://www.dy2018.com/html/tv/oumeitv/index.html'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Referer':'https://www.dy2018.com/html/tv/oumeitv/index.html'}
response = requests.get(url_news,headers=headers)
#返回网络通信状态
print(response.status_code)
#中文编码
response.encoding = 'gb2312'
scode = response.status_code
if scode == 200:
print("start now")
#网页的节点对象
domroot = etree.HTML(response.text)
# xpath 进行爬虫规格设置
movielist = domroot.xpath('//td[@height=26]/b/a')
# 电影的个数
print('电影个数为:', len(movielist))
# 循环进行遍历
for x in movielist:
title = x.xpath('text()')
if title:
title = title[0]
else:
continue
print('电影名字:',title)
# 电影标题
url =x.xpath('@href')
if url:
url = url[0]
else:
continue
print('电影地址: ', url) # 电影地址
url = "https://www.dy2018.com/" + url
response = requests.get(url, headers=headers)
response.encoding = 'gb2312'
page_2 = etree.HTML(response.text)
page_2_img_url = page_2.xpath("//div[@id='Zoom']/p[1]/img[1]/@src")
if page_2_img_url:
page_2_img_url = page_2_img_url[0]
else:
continue
print('电影图片: ', page_2_img_url)
time_now = datetime.datetime.now().strftime('%H:%M:%S.%f')
cursor.execute(sql, {'m_id':time_now, 'm_name':title, 'm_img':page_2_img_url})
conn.commit()
conn.close()
数据结果截图
js初学
// 第一步: 82~84行
var xhr = new XMLHttpRequest(); // JS语法严格区分大小写
xhr.open('GET', 'movie.db', true); // 标准的ajax的代码
xhr.responseType = 'arraybuffer';
// 定义查询数据库代码
function queryBy(sql, t) {
var uInt8Array = new Uint8Array(t.response);
var db = new SQL.Database(uInt8Array);
var content = db.exec(sql);
return content;
}
// 第二步: 读取数据,进行数据渲染
function templateHTML(content, name) {
moviedata = content[0].values; // 数据库里面的数据
console.log(moviedata); // 数据测试
html = '<div class="news">';
for (i = 0; i < moviedata.length; i++) {
html += '<div>';
html += '<img src="'+ moviedata[i][2] +'">';
html += '<span>'+ moviedata[i][1] +'</span>';
html += '</div>';
}
html += '</div>';
$("body").append(html);
}
// 第三步: 发送SQL语句
xhr.onload = function(e) {
content = queryBy('select * from movie limit 5 offset 0', this);
templateHTML(content, 'main_recommand');
content = queryBy('select * from movie limit 5 offset 4', this);
templateHTML(content, 'main_recommand');
}
// 第四步: 执行
xhr.send();