#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date : 18/03/23 14:22:58
# Desc : qq登陆 , 滑动验证暂没处理
import os
import time
from selenium import webdriver
from yichuxing.settings import qq_list
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
class Login(object):
# 登陆qq,获取cookie
LoginURL = "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw&cityid=110000"
def __init__(self, **kwargs):
self.qq_num = kwargs.get("qq_num")
self.qq_passwd = kwargs.get("qq_passwd")
def after_smoothly_login(self, driver):
cookie = {}
for elem in driver.get_cookies():
cookie[elem["name"]] = elem["value"]
#driver.quit()
return cookie
def get_cookie_by_Chrome(self):
try:
chromedriver = "C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"
os.environ["webdriver.chrme.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
#driver = webdriver.Chrome()
driver.set_page_load_timeout(10)
driver.get(self.LoginURL)
driver.find_element_by_id("u").send_keys(self.qq_num)
driver.find_element_by_id("p").send_keys(self.qq_passwd)
driver.maximize_window()
driver.find_element_by_id("go").click()
time.sleep(6)
if "宜出行" in driver.title:
return self.after_smoothly_login(driver)
elif "手机统一登录" in driver.title:
return None
except Exception:
# driver.close()
return None
def get_cookie_by_PhantomJS(self):
try:
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36"
)
driver = webdriver.PhantomJS(desired_capabilities=dcap)
driver.set_page_load_timeout(10)
driver.get(self.LoginURL)
driver.find_element_by_id("u").clear()
driver.find_element_by_id("u").send_keys(self.qq_num)
driver.find_element_by_id("p").clear()
driver.find_element_by_id("p").send_keys(self.qq_passwd)
driver.find_element_by_id("go").click()
time.sleep(6)
if "宜出行" in driver.title:
return self.after_smoothly_login(driver)
elif "手机统一登录" in driver.title:
return None
except:
# driver.close()
return None
class CookieException(Exception):
# 创建一个异常类,用于在cookie失效时抛出异常
def __init__(self):
Exception.__init__(self)
"""
if __name__ == "__main__":
#L = Login(qq_num="xxxx", qq_passwd="xxxx")
#L.get_cookie_by_Chrome()
"""
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date : 18/03/23 14:22:58
# Desc : 宜出行热力图
import hashlib
import socket
import os
import json
import time
import random
import datetime
import requests
from yichuxing.settings import qq_list, s_fre, proxyMeta, is_proxy
from requests.exceptions import RequestException
#from utils.user_angents import agents
from data_utils.ali_oss import OSS2
from data_utils.time_convert import get_time_stamp
from yichuxing.yichuxing_utils.qqlogin import CookieException, Login
from data_utils.conmongodb import mongo_con_keepalive
from yichuxing.yichuxing_utils.create_grid import create_grid_by_center, get_gd_data
class Crawl():
db = mongo_con_keepalive()
header = {
"Host": "c.easygo.qq.com",
"Connection": "keep-alive",
"Accept": "application/json",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"X-Requested-With": "XMLHttpRequest",
"Referer": "http://c.easygo.qq.com/eg_toc/map.html?origin=csfw",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
}
start_url = "http://c.easygo.qq.com/api/egc/heatmapdata"
cookie_data = None
if is_proxy:
wait_time = [0.16, 0.17]
else:
wait_time = [3, 3.1, 3.2, 3.3, 3.4]
time_stamp = get_time_stamp()
time_local = time.localtime(int(time_stamp))
date = time.strftime("%Y-%m-%d", time_local)
proxies = {
"http": proxyMeta,
"https": proxyMeta,
}
fre_data = {"qq": None,"pwd": None}
fre = 0
pid = os.getpid()
oss = OSS2()
path_dir = None
website = "population_yichuxing"
qq_status = "yichuxing_qq_status"
def __init__(self):
self.path_dir = "population/yichuxing/{0}/".format(self.time_stamp)
self.db.get_collection('pathdir_dict').insert_one(
{'pathdir': self.path_dir, 'website': self.website, 'flag': False}
)
if self.db.get_collection(self.qq_status).find_one({"date": self.date}) is None:
self.db.get_collection(self.qq_status).remove({})
print("新的一天,新的开始 初始化所有账号")
self.db.get_collection(self.qq_status).insert_many(
[{"qq": i["qq"], "pwd": i["pwd"], "n": 0,
'status': False, "date": self.date} for i in qq_list]
)
super(Crawl, self).__init__()
def kill(self):
try:
os.system("kill {0}".format(self.pid))
except OSError as e:
print("kill pid error: ", e)
def get_cookie(self):
all_qq = self.db.get_collection(self.qq_status).find(
{"status": False}, {"_id": 0}
)
qq_list = [i for i in all_qq]
if len(qq_list) > 0:
self.fre = 0
self.fre_data = random.choice(qq_list)
qq_num = self.fre_data.get("qq")
qq_passwd = self.fre_data.get("pwd")
L = Login(qq_num=qq_num, qq_passwd=qq_passwd)
cookie_data = L.get_cookie_by_PhantomJS()
#cookie_data = L.get_cookie_by_Chrome()
if cookie_data:
self.cookie_data = cookie_data
elif len(qq_list) == 0:
print("没有账号了, 杀死自己")
self.kill()
def spyder_params(self, item):
# 生成 请求参数
params = {"lng_min": item.get("lng_min"),
"lat_max": item.get("lat_max"),
"lng_max": item.get("lng_max"),
"lat_min": item.get("lat_min"),
"level": 16,
"city": "",
"lat": "undefined",
"lng": "undefined",
"_token": ""
}
return params
def spyder(self, params):
time.sleep(random.choice(self.wait_time))
try:
if self.fre >= s_fre:
print("账号: {0}, 抓取次数达到上限, 更换qq账号".format(self.fre_data.get("qq")))
qq = self.fre_data.get("qq")
self.db.get_collection(self.qq_status).update_one(
{"qq": qq}, {"$set": {"status": True}}
)
self.get_cookie()
if is_proxy:
r = requests.get(self.start_url, headers=self.header,
cookies=self.cookie_data, params=params, proxies=self.proxies)
else:
r = requests.get(self.start_url, headers=self.header,
cookies=self.cookie_data, params=params)
if r.status_code == 200:
self.fre = self.fre + 1
try:
return r.json()
except:
raise CookieException
else:
raise CookieException
except RequestException :
self.spyder(params)
def get(self, params):
data_json = None
try:
data_json = self.spyder(params)
except CookieException:
print("账号: {0}, cookie 失效,获取新账号登陆, 并抓取".format(
self.fre_data.get("qq")))
qq = self.fre_data.get("qq")
self.db.get_collection(self.qq_status).update_one(
{"qq": qq}, {"$set": {"status": True}}
)
self.get_cookie()
data_json = self.spyder(params)
return data_json
def create_filename(self, url):
# 生成文件名
fname = '%s_%s_%s_%s.json' % (socket.gethostname(),
url.split('//')[-1].split('/')[0].replace('.', '-'),
hashlib.md5(url.encode()).hexdigest(),
str(time.time()).split('.')[0])
return fname
def start(self):
self.get_cookie()
for i in get_gd_data():
print("begin: ", i)
latlng_dict = create_grid_by_center(i)
print("将要抓取的次数: ", len(latlng_dict))
for o in latlng_dict:
print("抓取范围: ", o)
params = self.spyder_params(o)
data_json = self.get(params)
file_ = "{0}{1}".format(self.path_dir, self.create_filename("{0}{1}".format(self.start_url, params)))
if data_json.get("code") != 0:
print("code: {0}, 获取新的账号,再一次抓取".format(data_json.get("code")))
qq = self.fre_data.get("qq")
self.db.get_collection(self.qq_status).update_one(
{"qq": qq}, {"$set": {"status": True}}
)
self.get_cookie()
data_json = self.get(params)
if data_json.get("code") == 0 and len(data_json.get("data")) > 0:
data_json["cityname"] = o["cityname"]
#print(data_json)
self.oss.uploadfiledata(file_, json.dumps(data_json))
co = self.db.get_collection(self.qq_status).find({"status": False}).count()
print("剩余可用qq count: ", co)
if __name__ == "__main__":
c = Crawl()
c.start()
# 每个账号抓取次数
s_fre = 70
# 每次爬取方格的边长(0.04 > 4公里) 平移量
lat_offset = 0.04
lng_offset = 0.04
# 是否开始代理 True:开启, False:不开启
is_proxy = True
grade = {0:6, 1: 6, 2: 5, 3: 4, 4: 4, 5: 4} # 城市对应 抓取圈数
# 代理ip地址
proxyMeta = "http://xxx:xxx@proxy.abuyun.com:9020"
# qq 账号
qq_list = [
{"qq": "xxx", "pwd": "xxx"},
]
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author : zhibo.wang
# E-mail : d_1206@qq.com
# Date : 18/03/23 16:28:43
# Desc :
import json
import numpy as np
from yichuxing.settings import lat_offset, lng_offset, grade
from data_utils.conmongodb import mongo_con_keepalive
from data_utils.location_convert import bd09togcj02
db = mongo_con_keepalive()
def get_gd_data():
city_list = db.get_collection("params_citys").find({"exists_city": True}, {"_id": 0}).sort(
"class")
location = []
for city in city_list:
if city.get("province") != "广东省":
if city.get("class") == 3:
center_lng = city.get("center_lng")
center_lat = city.get("center_lat")
del city["center_lng"]
del city["center_lat"]
lng, lat = bd09togcj02(center_lng, center_lat) # 转腾讯坐标系
city["lng"] = lng
city["lat"] = lat
location.append(city)
return location
def create_grid_by_center(location, n=None):
# 以城市中心点辐射n圈 即 4*4*(5*2)`2 1600平方公里
# 4*4 是每个区域的大小 区域大小可在setting里设置, (5*2)`2 5是圈数
lng, lat, city_class, cityname = location["lng"], location["lat"], location["class"], location["cityname"]
if n is None:
n = grade.get(city_class)
n = float(n)
bottom_lat, top_lat = lat - lat_offset*n, lat + lat_offset*n
left_lng, right_lng = lng - lng_offset*n, lng + lng_offset*n
lat_range = np.arange(bottom_lat, top_lat, lat_offset)
end_data = []
for lat_ in lat_range:
lng_range = np.arange(left_lng, right_lng, lng_offset)
for lng_ in lng_range:
end_data.append({"lng_min": lng_,
"lat_max":lat_ + lat_offset ,
"lng_max": lng_ + lng_offset,
"lat_min": lat_,
"cityname": cityname})
return end_data
"cityname" : "北京市",
"province" : "北京市",
"citycode" : "131",
"center_lat" : 39.904211, # 百度坐标
"center_lng" : 116.407394,
"class" : 0,
"ftx_code" : "bj",
"meituan_code" : "beijing",
"meituan_id" : 1,
"dianping_id" : 2,
"dianping_code" : "beijing",
"gd_adcode" : "110000",
"gd_citycode" : "010",
"shunqi_code" : "beijing",
"xiecheng_code" : "BJS",
"xiecheng_status" : true,
"zhilian_code" : "beijing",
"baidu_id" : 131,
"exists_city" : true
{ "scale" : "20,50,100,200", "lng_a" : 116.550125, "lat_a" : 39.843624999999996, "lng_b" : 116.55662935278988, "lat_b" : 39.84962393215385, "lng_g" : 116.54429316621265, "lat_g" : 39.842540318493164, "gps_s" : "a", "count" : 800, "grid_y" : 159374, "grid_x" : 466200, "max_data" : 32000, "crawl_time" : "2018-05-29 10:03:37", "city" : "北京市", }
经纬度解密代码
http://c.easygo.qq.com/eg_toc/js/map-d76c21c16d.bundle.js
lng = 1e-6 * (250.0 * d['grid_x'] + 125.0)
lat = 1e-6 * (250.0 * d['grid_y'] + 125.0)