import re
from urllib.parse import urljoin
import queue
from queue import Queue
from threading import Thread
import pandas as pd
from login_in import *
from fake_useragent import UserAgent
from mysql_modles import *
ua = UserAgent()
headers = {"User-Agent": ua.random}
que = Queue()
class SpiderPicture(object):
def execl_to_mysql(self):
df = pd.read_excel(r'回单下载.xlsx', engine='openpyxl')
df_list = df['ship_no'].tolist()
for ship in df_list:
picture_ft = Pictures()
que.put(ship)
tests = Pictures.select().where(Pictures.ship_no == ship)
if not tests:
picture_ft.ship_no = ship
picture_ft.save()
def link_id(self, request):
ship_no = que.get(timeout=2)
link_id_url = f'https://xtms.sfwl.net/Net/SearchPub.aspx?movement=get_way_search&objValue={ship_no}&Select_Condition=way_print_sn'
res = request.get(link_id_url, headers=headers)
link_id = re.search('way_id=(.*)', res.url).group(1)
picture_ft = Pictures.update(link_id=link_id).where(Pictures.ship_no == ship_no)
picture_ft.execute()
return link_id, ship_no
def download_url(self, request, link_id):
url = f'https://ess.sfwl.net/TransferWebCenter/AttachInfo.aspx?link_id={link_id}'
res = request.get(url, headers=headers)
part_url = re.search('img src="(.*)"', res.text).group(1)
domain = 'https://ess.sfwl.net/TransferWebCenter/'
download_urls = urljoin(domain, part_url)
picture_ft = Pictures.update(download_url=download_urls).where(Pictures.link_id == link_id)
picture_ft.execute()
return download_urls
def start_download(self, request, download_urls, ship_no):
res = request.get(download_urls, headers=headers)
file_name = os.path.join(BASE_DIR, '照片', f'{ship_no}.jpg')
with open(file_name, 'wb') as f:
f.write(res.content)
picture_ft = Pictures.update(remarks='已下载').where(Pictures.ship_no == ship_no)
picture_ft.execute()
def run(self):
while True:
try:
link_id, ship_no = self.link_id(request)
download_urls = self.download_url(request, link_id)
self.start_download(request, download_urls, ship_no)
except queue.Empty:
time.sleep(2)
break
if __name__ == '__main__':
warnings.filterwarnings("ignore")
login_in = LoginIn()
driver, request = login_in.run()
spider_picture = SpiderPicture()
spider_picture.execl_to_mysql()
t1 = Thread(target=spider_picture.run)
t1.start()
t2 = Thread(target=spider_picture.run)
t2.start()
t3 = Thread(target=spider_picture.run)
t3.start()
from peewee import *
db = MySQLDatabase('spider_picture', user='root', host='localhost', password="password", port=3306)
class BaseModel(Model):
class Meta:
database = db
class Pictures(BaseModel):
ship_no = CharField()
link_id = CharField()
download_url = CharField()
remarks = CharField()
class Meta:
table_name = 'pictures'
def create_table(table):
if not table.table_exists():
table.create_table()
def drop_table(table):
if table.table_exists():
table.drop_table()
if __name__ == '__main__':
create_table(Pictures)
import requests
import os
import time
import json
import urllib3
import warnings
from selenium import webdriver
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
class LoginIn(object):
def get_cookies(self):
chromeOptions = webdriver.ChromeOptions()
prefs = {'download.default_directory': BASE_DIR}
chromeOptions.add_experimental_option('prefs', prefs)
chromeOptions.page_load_strategy = 'eager'
driver = webdriver.Chrome(chrome_options=chromeOptions)
driver.implicitly_wait(20)
page_url = 'https://agx.sfwl.net/login/?url=http://xtms.sfwl.net/Main.htm'
driver.get(page_url)
driver.maximize_window()
time.sleep(30)
cookies = driver.get_cookies()
json_cookie = json.dumps(cookies)
return driver, json_cookie
def login(self, driver, json_cookie):
request = requests.Session()
cookies = json.loads(json_cookie)
for cookie in cookies:
cookie_dict = {
'domain': '.sfwl.net',
'name': cookie.get('name'),
'value': cookie.get('value'),
"expiry": 1658386136,
'path': '/',
'httpOnly': True,
'Secure': True
}
request.cookies.set(cookie['name'], cookie['value'])
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
driver.refresh()
return driver, request
def run(self):
driver, json_cookie = self.get_cookies()
driver, request = self.login(driver, json_cookie)
return driver, request
def log():
url = 'https://auth.tesla.com/oauth2/v1/authorize?redirect_uri=https://suppliers.teslamotors.com/logistics/auth/callback&client_id=logisticsportal&response_type=code&state=QfgVq8Lyi3&scope=openid email profile employee metadata|warp&nonce=zljmf8wic1s&response_mode=query&audience=https://suppliers.teslamotors.com/logistics/&code_challenge=E7IxhvFknp-ZUlAFzRnnL-TJ7i4rvuSNPVm9O_K064Q&code_challenge_method=S256'
headers = {
'authority': 'auth.tesla.com',
'method': 'POST',
'origin': 'https://auth.tesla.com',
'referer': 'https://auth.tesla.com/oauth2/v1/authorize?redirect_uri=https%3A%2F%2Fsuppliers.teslamotors.com%2Flogistics%2Fauth%2Fcallback&client_id=logisticsportal&response_type=code&state=QfgVq8Lyi3&scope=openid%20email%20profile%20employee%20metadata%7Cwarp&nonce=zljmf8wic1s&response_mode=query&audience=https%3A%2F%2Fsuppliers.teslamotors.com%2Flogistics%2F&code_challenge=E7IxhvFknp-ZUlAFzRnnL-TJ7i4rvuSNPVm9O_K064Q&code_challenge_method=S256',
'accept-language': 'zh-CN,zh;q=0.9',
'upgrade-insecure-requests': '1',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'cookie': 'ak_bmsc=34A80A389C39BEF47BE54EE3091DD27B~000000000000000000000000000000~YAAQM9o4fdpQqCiHAQAA6uBGLxMWg8WFEmVcvan4MY61BeeDA2k4w8hpAx0yfSwVgqqIXfuO9EL6a9n9O361Di3CI4m7CYqeBTeJH5PkMhfXmN9vNt888jNw8Msg7SSR0NwOka8RazXA+H3zc1nXYiswEUbQrBwVh4MW9Yd0NvfHBJ4mXxPxydnzj0TYGowiojOyfdwQS+fRIu8ahee2tV+EGxX1RsILgf/eyB8AdY80aOduOQaSusADqLJFWdL5/XYW672RpTrvvv9qv8bfbGwVI6N0j3fQFoyhRyeiF4f0HMs/W9Ob6Lw1g+1pCYLFx60GGbg95jWQmE8kyB7jreX39JR7HFd5+fZJg9KO+ItDbtCUHug++HePKXhvPa2fy57BQspAC6ED; bm_sz=1C2A919A691FAEC25EC7A9D3A4908B17~YAAQM9o4fdtQqCiHAQAA6uBGLxNENXj65XKSZDMfXnu4z/sxf2FG3meKSmWC0XqLcOghuY7iUdxu6gvP8ook7Hjra/C/kE85AqGM9cswULeLhK1rxJAKomLoyEsMO40Dk1egtJ9EBQZ7rAgNl1Hexm8EmodhvapAypNMtbLst/gE5CO/+5HsqGycU1GlHpkBR9zVIvEYJFPRCSUEa6SQKsjSB7Mn7BM/OETpd7T8CHmvm59PzJXZYml9oSRAaiv11uf9oWf/dL6yIurPkHcfevYDLK2OFQAPiWWUvl8EdtGtdQ==~3486276~4538934; i18next=zh-CN; tesla-auth.sid=s%3AnBVK6G5uORSm7IWei_7hD_kocaNOts6T.rgSjii7uVnpMYdHrpNh0SBl5Z2f22ImPn0uQsgKBtxE; _abck=ACEE05F949477867E37AF2736662CC20~0~YAAQHmQcuC2FsQ+HAQAACx6HLwkA0gkgUo44AQIrfevekcbSm+yk/1SnyIIltvAEbu35fTWvrIIqcyJmJaKEsyel+xy/IDdk22dJT50me/eTEDTazqsxUN8OeDZcMhPIcCD+54zLArLNeqYVwVDgXfU1YdEl+0KRXlojYlWqhxWTQrjTIKoXiIjUx5GUBJdIw8BmNdeUq1OFk86/RIeQZv5osc7ZIE/HT+d4uy3ZXvpSfalrXShQaue4a0ik5rbGkn73dlVUcIK4330GQgceOJcz4jtARTkKVcIYoLW+bVZdEDGh1Z5MBZiDt8pzWM40eNfN/xhIYLUgn+xfQqzHoBDJEtwd0q+wpu1UsH+qCBxyvvFY8ztGlOzKU8m9bOl5BXVlwWVgsGB0jri9stOE2KBdo3s8VuY=~-1~||-1||~-1; bm_mi=FD848EEF53B97BCE78875F5F14AA54D9~YAAQHmQcuEiFsQ+HAQAAZyqHLxPWYOCORKGng+kRM22U7YIkBXOqNIM7SQAuNcoeXcfnRZ3cleuNNQjtm9XGzE55yFu2ywuKMiitfoRyLmTr7GGdDO3dm+vgkI5XpkPzTFDHfDWqC7FNQuyFBPuQKfvS/Bn6QpHljO7mqVJ5LohVJr5wPuA+AXCE4Us7Yqx0LZN4C4YSGnklIdI9nVYRS659j4YTUbZ/xI05mzuiKH0V5urgbTP1luwGmXO0uGzyc3Kt/S0Y2n9VZ5aPfRPugDP5mvIEmGMdBUzTaB2HQaDg5oPq5Sq8jQUAwkqJXx67Tfg0XgwBt2+UE43X3+rKl+GfSbsI~1; bm_sv=D3D985818616B0FD478D441F21704543~YAAQHmQcuEmFsQ+HAQAAZyqHLxMCjR9qbAcuOw9g39sjxApkakeJ2Po+dD9bw6Ubr4KTar7DQE1AhTwyaZyck0JnFsxUCFqZJrCp80aMkh4+7wcqPTFSfAbbNIce5on3yx2H8C9Kp+Yt85lhAMLmwoVHmh0d8x26pBqqyYq8xn373usH6In8LxASLjN0J6w1VrC0cZEJqMcHl9XCZBrE7kmb8XlMPEKYKgoLqrtT/qU2i5mcVlFxfw5El/hc3k9t~1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
data = {
'_csrf': 'CmzyPjla-w1SCv7pprG5T__wQz18POQ_HXF8',
'_phase': 'authenticate',
'_process': '1',
'transaction_id': 'sOKdci2Z',
'identity': 'jie.lin@sfwl.com.cn',
'correlation_id': '2526b2f6-418d-483e-8f3a-32ce814c6f9a',
'fingerPrint': '{"auth_method":"email-login","devicehash":"1d02a1580fcdc16c33d089bb7d11f863","client_id":"logisticsportal","hardware_concurrency":12,"screen_resolution":[960,1536],"audio":124.04347657808103,"touch_support":"6690a7caa6588891494df1e64b3d185b","web_gl":"WebGL 1.0 (OpenGL ES 2.0 Chromium)","browser_plugins":"73ddd9a85fc01dd86982e0a967643420","browser_canvas":"b6009120aeed8f78fd4207ad98ad29e4","browser_font":"bbbea47743cf8367b580a2097e639557"}',
'credential': '19830720a'
}
res = requests.post(url, headers=headers, data=data)
print(res.url)
if __name__ == '__main__':
log()