抓取中国港口网数据
import json
import time
import random
import datetime
import schedule
from pymongo import MongoClient
from datetime import datetime
import requests
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': 'ssoUser=@b53c0c7944afc2c426ef87aefa64ae16; UM_distinctid=174bdf61ac146-01ae710934f1b4-3d634d00-384000-174bdf61ac27fc; jsid=15222cfa-bdb8-4e14-b61c-8a0350a55038; Hm_lvt_112cc63ae7d0082ab4d30ec2c3a16614=1600914857,1600914865; CNZZDATA3453251=cnzz_eid%3D1195104629-1600910660-null%26ntime%3D1600910660; Hm_lpvt_112cc63ae7d0082ab4d30ec2c3a16614=1600914906',
'Host': 'www.chinaports.com',
'Origin': 'http://www.chinaports.com',
'Referer': 'http://www.chinaports.com/shiptracker/shipinit.do?method=login',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
class Tools():
@staticmethod
def calculate_track(lon, lat):
'''
经纬度的转换
:param lon:
:param lat:
:return: 转换后的经纬度
'''
lon = abs(lon)
lat = abs(lat)
lon_degree = int(lon)
lat_degree = int(lat)
lon_minute = int((lon - lon_degree) * 60)
lat_minute = int((lat - lat_degree) * 60)
lon_second = ((lon - lon_degree) * 60 - lon_minute)
lat_second = ((lat - lat_degree) * 60 - lat_minute)
if int(str(lon_second)[3]) >= 5:
lon_second1 = str(lon_second + 0.1)[2:3]
if int(lon_second1) >= 10:
lon_second = '0' + '′'
lon_minute = lon_minute + 1
if lon_minute >= 60:
lon_minute = '00' + '.'
lon_degree = lon_degree + 1
lon_degree = str(lon_degree) + '°'
else:
lon_minute = str(lon_minute) + '.'
lon_degree = str(lon_degree) + '°'
else:
lon_degree = str(lon_degree) + '°'
lon_minute = str(lon_minute) + '.'
lon_second = str(lon_second1) + '′'
else:
lon_degree = str(lon_degree) + '°'
lon_minute = str(lon_minute) + '.'
lon_second = str(lon_second)[2:4] + '′'
lon_data = lon_degree + lon_minute + lon_second
if int(str(lat_second)[3]) >= 5:
lat_second1 = str(lat_second + 0.1)[2:3]
if int(lat_second1) >= 10:
lat_second = '0' + '′'
lat_minute = lat_minute + 1
if lat_minute >= 60:
lat_minute = '00' + '.'
lat_degree = lat_degree + 1
lat_degree = str(lat_degree) + '°'
else:
lat_minute = str(lat_minute) + '.'
lat_degree = str(lat_degree) + '°'
else:
lat_degree = str(lat_degree) + '°'
lat_minute = str(lat_minute) + '.'
lat_second = str(lat_second1) + '′'
else:
lat_degree = str(lat_degree) + '°'
lat_minute = str(lat_minute) + '.'
lat_second = str(lat_second)[2:4] + '′'
lat_data = lat_degree + lat_minute + lat_second
return lon_data, lat_data
@staticmethod
def get_time():
'''
利用当前的时间获取前一天的时间
:param localtime:
:return:
'''
localtime = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
timeArray = time.strptime(localtime, "%Y-%m-%d %H:%M:%S")
timeStamp = int(time.mktime(timeArray))
lastday_time = timeStamp - 86400
timeArray1 = time.localtime(lastday_time)
lastday_time = time.strftime("%Y-%m-%d %H:%M:%S", timeArray1)
return lastday_time,localtime
class Gettrack_data:
connect_mongo = MongoClient('mongodb://{}:27017/'.format('localhost'))
def __init__(self, ship_name):
'''
:param ship_name:
'''
self.ship_id = None
self.mmsi = None
self.ship_name = ship_name
self.tools = Tools()
def get_ship_id(self):
'''
获取shipid
:return:
'''
url = 'http://www.chinaports.com/shiptracker/newshipquery.do'
data = {
'method': 'search',
'isall': 0,
'vession': 0,
'cnqp': self.ship_name,
'queryParam': self.ship_name
}
response = requests.post(url=url, headers=headers, data=data)
try:
self.ship_id = response.json()[0][1]
except:
self.ship_id = None
def get_track_list(self, begain_time, end_time):
'''
获取轨迹点
:param begain_time:轨迹开始的时间
:param end_time:轨迹结束的时间
:return:
'''
self.get_ship_id()
if self.ship_id == None:
return
urlp = 'http://www.chinaports.com/shiptracker/shipinit.do'
datas = {
'method': 'shipTrail',
'zoomleavel': '11',
'shipid': self.ship_id,
'begindate': begain_time,
'enddate': end_time,
'common': 'undefined',
'encode': 'true',
}
response1 = requests.post(url=urlp, headers=headers, data=datas)
result_mmsi = self.get_mmsi()
count = 0
print(self.ship_name,result_mmsi)
for i in response1.json():
count += 1
self.save_data(self.ship_name, result_mmsi, i)
def get_mmsi(self):
'''
获取 ship的 mmsi
:return:
'''
if self.ship_id == None:
return
url_mmsi = 'http://www.chinaports.com/shiptracker/shipinit.do'
data_mmsi = {
'method': 'pospoint',
'type': '1',
'shipid': self.ship_id,
'encode': 'true'
}
response2 = requests.post(url=url_mmsi, headers=headers, data=data_mmsi)
data_mmsistr = response2.text.split("[[", maxsplit=1)[1].rsplit("]]", maxsplit=1)[0]
data_mmsis = data_mmsistr.split(',')
self.mmsi = data_mmsis[6][2:-1]
return self.mmsi
def save_data(self, name, mmsi, data):
'''
保存数据
:param name: ship_name
:param mmsi: ship_id
:param data: ship_info
:return:
'''
db = self.connect_mongo.Gangkou
mycol = db['{}'.format(name)]
try:
log_data, lat_data = self.tools.calculate_track(data[1], data[2])
dict_info = {'ship_name': name, 'ship_mmsi': mmsi, 'time': data[0], 'log': log_data, 'lat': lat_data}
mycol.insert_one(dict_info)
except Exception as errors:
print(errors)
def main():
'''
程序启动的主函数
:return:
'''
myclient = Gettrack_data.connect_mongo
list_havetrack = ['truecorsair', 'wisdomofthesea1', 'alphaunity', 'glovisadvance', 'panfreedom', 'victorious',
'oresaoluis', 'mineralutamaro',
'aashna', 'densacobra', 'atalandi', 'truecartier', 'jozen', 'magsenger12', 'pacificcanopus',
'taharoaeos', 'changhangbinhai',
'ormond', 'hebeiuniverse', 'capeamanda', 'amportcartier']
sd = Tools()
begain_time, localtime = sd.get_time()
for j in list_havetrack:
ship = Gettrack_data(j)
ship.get_track_list(begain_time, localtime)
myclient.close()
if __name__ == '__main__':
schedule.every().days.at("10:40").do(main)
while True:
schedule.run_pending()
time.sleep(1)