import os
import time
import pymysql
import pandas as pd
import re
import random # 导入requests库
import csv # 导出为csv文档
import requests
from fake_useragent import UserAgent # 导入随机获取UA的库
import json
import pandas as pd
from shapely.geometry import Point, Polygon
import glob
mmsi_list=['413996270', '413819502', '413825696', '413778811', '413819498', '413981334', '413819516', '413981375', '413994991', '413989687', '413989650', '413989663', '413831022', '413796869', '413989674', '413989685', '413796913']
class ShipxySpider(object):
def __init__(self):
self.session = requests.Session()
self.ua = UserAgent()
Cookie = 'FD857C2AF68165D4=yhLGwCZ+CdOuTxnfKBtXqTFRLT7+2GhMO7z1sXLSKgL+Najd+uXQGG+5Io0fGQxQ; token=0d2cfcdbf57e20ff17b8b021d6586b2a; tc_TC=; _elane_shipfilter_type=%u8D27%u8239%2C%u96C6%u88C5%u7BB1%u8239%2C%u6CB9%u8F6E%2C%u5F15%u822A%u8239%2C%u62D6%u8F6E%2C%u62D6%u5F15%2C%u6E14%u8239%2C%u6355%u635E%2C%u5BA2%u8239%2C%u641C%u6551%u8239%2C%u6E2F%u53E3%u4F9B%u5E94%u8239%2C%u88C5%u6709%u9632%u6C61%u88C5%u7F6E%u548C%u8BBE%u5907%u7684%u8239%u8236%2C%u6267%u6CD5%u8247%2C%u5907%u7528-%u7528%u4E8E%u5F53%u5730%u8239%u8236%u7684%u4EFB%u52A1%u5206%u914D%2C%u5907%u7528-%u7528%u4E8E%u5F53%u5730%u8239%u8236%u7684%u4EFB%u52A1%u5206%u914D%2C%u533B%u7597%u8239%2C%u7B26%u540818%u53F7%u51B3%u8BAE%28Mob-83%29%u7684%u8239%u8236%2C%u62D6%u5F15%u5E76%u4E14%u8239%u957F%3E200m%u6216%u8239%u5BBD%3E25m%2C%u758F%u6D5A%u6216%u6C34%u4E0B%u4F5C%u4E1A%2C%u6F5C%u6C34%u4F5C%u4E1A%2C%u53C2%u4E0E%u519B%u4E8B%u884C%u52A8%2C%u5E06%u8239%u822A%u884C%2C%u6E38%u8247%2C%u5730%u6548%u5E94%u8239%2C%u9AD8%u901F%u8239%2C%u5176%u4ED6%u7C7B%u578B%u7684%u8239%u8236%2C%u5176%u4ED6; _elane_shipfilter_length=0%2C40%2C41%2C80%2C81%2C120%2C121%2C160%2C161%2C240%2C241%2C320%2C321%2C9999; _elane_shipfilter_sog=0%2C1; _elane_shipfilter_naviStatus=0%2C1%2C2%2C3%2C4%2C5%2C6%2C7%2C8%2C15%2C255; _elane_shipfilter_olength=; _elane_shipfilter_osog=; _elane_shipfilter_customsog=; _filter_flag=-1; _elane_shipfilter_one=2; _elane_shipfilter_country=0%2C1%2C2; tc_QX=; gdxidpyhxdE=nDMU7fmf2UqADGgUxwj%5CHv%2FnTjc%2BZTyiYxqNQ8xom2cyOSqVvO%2B7CjtaD6t31xhknUuU9%2B6fI4rDa8U0E4V56iT%2FOWtIAsdYkzdN87zLV0TfL69YRZ5yVdmgcVs7B1k06tPgMitB%2BEKmp5sP5rhkg8HJO3aaTXuWQI8KON6DgMY8ELqf%3A1712072404707; Hm_lvt_adc1d4b64be85a31d37dd5e88526cc47=1712047123,1712066221,1712110039; ASP.NET_SessionId=y4qlwqjxmvzq3jqdjir33p3a; .UserAuth2=D0B5D9179A7B0435732BBA58AC037C529D0DD57AECEBC5F591968084DC9F8B10F3D5D09DEFA427B0A905D4333DA733E04D100EFB8BD67C4E60B75A37004A8A0E101C0309AA9E68BEFB3E90042A032AEAF5F508FB816366CBC1B1FD0DE86A72BAB9DFA46401EB0FFB173AB95315BC3DDAC021774B9205F9A68DD4127BD0692C57B21931656C74C0985840DF09191ED91CB4F4C78DF972D7E77145CED109027E83EB8D81C2; UserAuthUCenter=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6ImtfS1JMRk9TM1dPYk9ZcUN2ZEpKS2I3ZGo1TSIsImtpZCI6ImtfS1JMRk9TM1dPYk9ZcUN2ZEpKS2I3ZGo1TSJ9.eyJpc3MiOiJodHRwOi8vaWQyLmVsYW5lLmNvbS9jb3JlIiwiYXVkIjoiaHR0cDovL2lkMi5lbGFuZS5jb20vY29yZS9yZXNvdXJjZXMiLCJleHAiOjE3MTk4ODcxNzcsIm5iZiI6MTcxMjExMTE3NywiY2xpZW50X2lkIjoiSFlRX1Jlc19BbGxJbiIsInNjb3BlIjpbIm9wZW5pZCIsInByb2ZpbGUiLCJ1c2VyYXBpIl0sInN1YiI6Ijk2NzZmOWQ3YTc5MmQ3MjkiLCJhdXRoX3RpbWUiOjE3MTIxMTExNzcsImlkcCI6Imlkc3J2IiwiYW1yIjpbInBhc3N3b3JkIl19.FDuV9RCdijGMk-nZAk_xUTcrscVwGfw_c0xqlfU0Pf2TQaXPv6YcAB_WezyAPECLxXDOrh6TUAUuk_2wj8f5_hMJTXzxikWTqAoHDhUB4V0TY_nWgT37Mzwc1QMX_mNWTm9plNTchiwAva5-D0RLeB4nj_P_PHfGwVeOIGH-nFtFNfNosrGUZUEVoQ1KU-Ob1SeY_ONXbx5FIOlpa9nYMx_uUslLjq8yDDX7TWz0n0im6URxIPk1tqZPoT4wt3J3ahkcqNphgMWsXpTNxLP0-PAAn7pIamfgNR3cvuXbd6dSxBnpzsrgpLHoIRa9BgALL3VU_8akpA00wso3jOZ9bg; Hm_lpvt_adc1d4b64be85a31d37dd5e88526cc47=1712111182; jfg=34cef8a940d65c5e3eb8adb4049a1d50; SERVERID=b8466f4910324363455fdd8cd201bb92|1712111276|1712110035' # 请替换成你的Cookie
self.header = {
'User-Agent': self.ua.chrome,
'Referer': 'http://www.shipxy.com/',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Cookie': Cookie,
# 可能还需要其他头信息
}
def make_http_request(self, mmsi):
# 发送HTTP请求获取数据
url = f'https://www.shipxy.com/ship/GetShip?mmsi={mmsi}'
try:
response = self.session.get(url, headers=self.header)
if response.status_code == 200:
result = response.json()
if result.get('status') == 2:
print(f"MMSI: {mmsi}, Message: {result.get('msg')}")
return None
return result
else:
print(f"MMSI: {mmsi}, 请求失败,状态码: {response.status_code}")
return None
except Exception as e:
print(f"MMSI: {mmsi}, HTTP请求过程中发生错误: {str(e)}")
return None
def process_mmsi_list(self, mmsi_list):
all_data = []
for mmsi in mmsi_list:
data = self.make_http_request(int(mmsi))
if data:
all_data.append(data)
# 控制请求频率以避免触发反爬虫机制
time.sleep(0.2)
return all_data
mmsi1=[]
type1=[]
length=[]
width=[]
name=[]
if __name__ == '__main__':
spider = ShipxySpider()
#mmsi_list = list(set(ship_data['mmsi'].tolist()))# 示例MMSI号列表,请替换为实际的MMSI号列表
all_data = spider.process_mmsi_list(mmsi_list)
for data in all_data:
try:
mmsi1.append(data['data'][0]['mmsi'])
type1.append(data['data'][0]['type'])
length.append(data['data'][0]['length']/10)
width.append(data['data'][0]['width']/10)
name.append(data['data'][0]['name'])
except:
continue
for i in all_data:
print(i)
输出结果一直显示:401.
个人进行调整:把船讯网添加到浏览器白名单,但是还是没用。
因为做毕业论文要用到ais静态数据,所以比较着急,若有高人指点,解决此问题,重谢!