import time
import random
import json
import re
import urllib3
import requests
import pandas as pd
from selenium import webdriver
from bs4 import BeautifulSoup
def get_cookies():
driver = webdriver.Chrome()
page_url = 'https://agx.sfwl.net/login/?url=http://xtms.sfwl.net/Main.htm'
driver.get(page_url)
driver.maximize_window()
# 进行扫码登录
time.sleep(15)
# 获取列表形式的cookies
cookies = driver.get_cookies()
jsonCookie = json.dumps(cookies)
return driver, jsonCookie
def login(driver, jsonCookie):
request = requests.Session()
# 给浏览器添加cookies
cookies = json.loads(jsonCookie)
for cookie in cookies:
cookie_dict = {
'domain': '.tapd.cn',
'name': cookie.get('name'),
'value': cookie.get('value'),
"expiry": 1658386136,
'path': '/',
'httpOnly': True,
'Secure': True
}
request.cookies.set(cookie['name'], cookie['value'])
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
driver.add_cookie(cookie_dict)
# 刷新网页,cookies才会成功
driver.refresh()
return driver, request
def get_shipment_no(driver, request):
df = pd.read_excel('回单.xlsx', header=0)
kf = df[df['托单号'] == df['托单号']]
pd.set_option('mode.chained_assignment', None)
for i in list(kf['托单号']):
url = f'https://xtms.sfwl.net/Net/SearchPub.aspx?r=0.3897748528302598&movement=get_way_search&objValue={i}&Select_Condition=way_print_sn'
driver.get(url)
driver.execute_script("window.scrollBy(0,2000)")
time.sleep(1)
try:
img_url = driver.find_element_by_xpath('//*[@id="imgUrl"]').get_attribute('imgurl')
user_agent_list = [
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
"Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15"
]
headers = {}
headers['User-Agent'] = random.choice(user_agent_list)
if img_url:
request.adapters.DEFAULT_RETRIES = 5
content = request.get(img_url, headers=headers, verify=False).content.decode()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
bsObj = BeautifulSoup(content, 'html.parser')
images = bsObj.findAll("img", {"src": re.compile("(.*)")})
for image in images:
img_url2 = "https://ess.sfwl.net/TransferWebCenter/" + image["src"]
content = request.get(img_url2, headers=headers, verify=False).content
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
with open(f'./回单下载照片/{i}.jpg', 'wb') as f:
f.write(content)
kf.loc[kf['托单号'] == i, '是否下载'] = '是'
else:
kf.loc[kf['托单号'] == i, '是否下载'] = '否'
except:
kf.loc[kf['托单号'] == i, '是否下载'] = '否'
kf.loc[kf['托单号'] == i, '备注'] = '外部单据'
kf.to_excel('回单.xlsx', index=False)
if __name__ == '__main__':
driver, jsonCookie = get_cookies()
driver, request = login(driver, jsonCookie)
get_shipment_no(driver, request)
seleniu模拟登录获取cookie,批量下载回单
最新推荐文章于 2024-07-22 15:15:29 发布