# @Author: Liu_li (happyandluck@163.com)
# @Date : 2020/3/4 9:01
# @coding : UTF-8
# @Project: Sutdy_2
# @FileName: myspider.py
import datetime
import os
import random
import sqlite3
import threading
import time
import urllib.request
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
class MySpider:
headres = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/70.0.3538.25 Safari/537.36 "
"Core/1.70.3741.400 QQBrowser/10.5.3863.400"
}
imagePath = "download/dianfanguo"
def startUp(self, url, key, getPage=0):
# Initializing Chrome browser
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
self.driver = webdriver.Chrome(chrome_options=chrome_options)
# Initializing varables
self.threads = []
self.No = 0
self.imgNo = 0
self.Page = 1
self.getPage = getPage
# Initializing database
try:
self.con = sqlite3.connect("phones.db")
self.cursor = self.con.cursor()
try:
self.cursor.execute("drop table phones")
except Exception as err:
print("删除曾经phones表err")
pass
try:
sql = "create table phones (" \
"mNo varchar(32) primary key, " \
"mMark varchar (256)," \
"mPrice varchar (32)," \
"mNote varchar (1024)," \
"mFile varchar (256))"
self.cursor.execute(sql)
except Exception as err:
print("建立新phones表err")
except Exception as err:
print(err)
# Initializing images folder
try:
if not os.path.exists(MySpider.imagePath
爬取京东官网,自动搜索保存所有商品信息
最新推荐文章于 2024-07-08 14:10:21 发布
本文详细介绍了如何使用爬虫技术,结合HTML5和JavaScript,解析并抓取京东官网的商品信息。通过HTTPS协议进行安全的数据请求,实现自动化搜索与保存所有商品数据的过程。
摘要由CSDN通过智能技术生成