第一题:题目的意思是去给定网页用xpath定位的方式抓取指定内容和数量的数据,原题目中已给出数据库连接相关参数,指定网页的网址。此处是本人考试时编写的相关代码。核心流程是:开始循环👉打开网页👉提取元素👉抓取数据👉处理数据👉连接数据库👉插入数据👉继续抓取直至结束
import xbot
from xbot import print, sleep, web
from .import package
from .package import variables as glv
import pymysql
import requests
import re
import random
url = ''
host = ''
user = ''
secret = ''
db = ''
def sqlLink():
connection = pymysql.connect(
host= host,
port=3306,
user= user,
password= secret,
database= db,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
if connection:
print('linked')
else:
print('link failed')
return connection
def getEle(page,index):
name_ele = page.find_by_xpath(f'//*[@id="tablepress-4"]/tbody/tr[{index}]/td[2]')
# page.find_by_xpath
year_ele = page.find_by_xpath(f'//*[@id="tablepress-4"]/tbody/tr[{index}]/td[1]')
area_ele = page.find_by_xpath(f'//*[@id="tablepress-4"]/tbody/tr[{index}]/td[1]/img')
director_ele = page.find_by_xpath(f'//*[@id="tablepress-4"]/tbody/tr[{index}]/td[3]')
money_ele = page.find_by_xpath(f'//*[@id="tablepress-4"]/tbody/tr[{index}]/td[4]')
commitor = '沸羊羊'
elements = [name_ele,year_ele,area_ele,director_ele,money_ele,commitor]
return elements
def getdata(eleList):
countrys = {
'中国':'1f1e8-1f1f3',
'英国':'1f1ec-1f1e7',
'美国':'1f1fa-1f1f8',
'印度':'1f1ee-1f1f3',
'日本':'1f1ef-1f1f5',
'芬兰':'1f1eb-1f1ee',
'黎巴嫩':'1f1f1-1f1e7',
'法国':'1f1eb-1f1f7',
'澳大利亚':'1f1e6-1f1fa',
}
name = re.search(r'^[^(]+',eleList[0].get_text()).group() or "No name"
year = int(eleList[1].get_text()) or 0
score = ''
if re.search(r'(-)',eleList[0].get_text()):
score = '无评分'
if re.search(r'\d+\.\d+',eleList[0].get_text()):
score = float(re.search(r'\d+\.\d+',eleList[0].get_text()).group())
area = 'No area'
for key,value in countrys.items():
if value in str(eleList[2].get_attribute('src')):
area = key
break
director = eleList[3].get_text()
money = int(re.search(r'\d+',eleList[4].get_text()).group()) or 0
commitor = eleList[5]
data = (name, year,area,score,director,money,commitor)
print('row data tuple: ',data)
return data
def nextPage(page, done):
next_btn = page.find_by_xpath('//*[@id="tablepress-4_next"]')
if len(next_btn.get_attribute('class').split(' ')) > 2:
done = True
print('Arrive last page')
return done
else:
next_btn.click(button='left')
sleep(random.randint(3,5))
def insert(sql,values):
connection = sqlLink()
try:
with connection.cursor() as cur:
cur.executemany(sql,values)
connection.commit()
print('insert succeed')
except Exception as e:
connection.rollback()
print('Insert faild,Rollbacked,e is: ',e)
finally:
if connection or connection.open:
connection.close()
print('connection closed')
def main(args):
page = web.create(url=url,mode='chrome',load_timeout=20)
count = 0
while True:
done = False
values = []
times = page.find_all_by_xpath('//*[@id="tablepress-4"]/tbody/tr',timeout=15)
for i in range(0,len(times)):
sleep(1)
browser = web.get_active(mode='chrome',load_timeout=20)
values.append(getdata(getEle(page,i+1)))
sleep(random.randint(3,5))
xbot.app.dialog.show_notifycation(f'catching over {count+1} page',placement='top',level='info',timeout=1)
count+=1
print('values list: ',values)
sql = 'INSERT INTO movie(`电影名称`,`上映年份`,`制片地区`,`评分`,`导演`,`票房`,`提交人`) VALUES(%s,%s,%s,%s,%s,%s,%s);'
insert(sql,values)
now_page = web.get_active(mode='chrome',load_timeout=20)
done = nextPage(now_page,done)
if done:
print('Task done!')
break
第二题:题目大意是获取指定接口的数据并筛选求和对应范围内的数据的指定指标之和并将所求得的值提交至对应数据库表,这题比较简单,跟着要求操作即可。
import xbot
from xbot import print, sleep
from .import package
from .package import variables as glv
import pymysql
import requests
import json
host = ''
user = ''
secret = ''
db = ''
def getData(url):
data = json.loads(requests.get(url).text)
return data
def sqllink():
connection = pymysql.connect(
host=host,
port=3306,
user=user,
password=secret,
database=db,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
if connection:
print('link success')
else:
print('link failed')
return connection
def search(data,mins,maxs):
data1 = []
res_3 = 0
res_9 = 0
res_0 = 0
for item in data:
res1 = {}
try:
res1['score'] = float(str(item['评分']).strip())
except (ValueError,TypeError,AttributeError):
res1['score'] = 0
res1['money'] = int(item['票房'])
data1.append(res1)
for item in data1:
if mins[0] <= item['score'] <= maxs[0]:
res_3+= item['money']
if mins[1] <= item['score'] <= maxs[1]:
res_9+= item['money']
if item['score'] == 0:
res_0+=item['money']
results = [('沸羊羊',f'{mins[0]}-{maxs[0]}',str(res_3)),('沸羊羊',f'{mins[1]}-{maxs[1]}',str(res_9)),('沸羊羊','无评分',str(res_0))]
print(results)
return results
def insert(connection,data,sql):
try:
with connection.cursor() as cur:
cur.executemany(sql,data)
connection.commit()
print('insert successed')
except Exception as e:
connection.rollback()
print('link roollbacked')
print(e)
finally:
if connection or connection.open:
print('link closed')
connection.close()
def main(args):
origin = 'https://mock.jsont.run'
sql = 'INSERT INTO result VALUES(%s,%s,%s);'
connection = sqllink()
insert(connection,search(getData(origin)['data'],[3.0,9.0],[3.5,9.5]),sql)