1 ''' 2 3 4 5 ajaxCallSiteInfo: {1C8B2BC6-35E2-460E-A63D-3576F3039D79} 6 ''' 7 8 import requests 9 import json 10 from dbutil.pgsql import PgsqlPipeline 11 from datetime import date 12 13 headers = { 14 'Cookie': 'ASP.NET_SessionId=citapgvmwpvq4qscezfwqjwr; NTKF_T2D_CLIENTID=guest8EE3B5EE-2EEE-0022-B9B6-F4D65DCFD295; nTalk_CACHE_DATA={uid:kf_9255_ISME9754_guest8EE3B5EE-2EEE-00,tid:1567475785166484}; sc_ext_session=kr5rhbkw4ldkmlstifvmj152; nissan#lang=zh-CN; SC_ANALYTICS_GLOBAL_COOKIE=5b20d0477cfd4924913d8cce649b25ab|True; sc_ext_contact=5b20d0477cfd4924913d8cce649b25ab|True; NO_PAGE_DURATION=2019/9/3 21:18:07; no_screen=1280%7C800; Place=%7B%22province%22%3A%22%E6%B2%B3%E5%8C%97%22%2C%22city%22%3A%22%E7%9F%B3%E5%AE%B6%E5%BA%84%22%7D', 15 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', 16 } 17 18 #读取cities.json文件取出省,市 19 def get_data(): 20 with open("./dbutil/cities.json", 'r') as load_f: 21 data_list = json.load(load_f)['provinces'] 22 return data_list 23 24 25 #post请求 获取json文件 26 def get_Dealers(citysName): 27 url = 'https://www.dongfeng-nissan.com.cn/Nissan/ajax/Distributor/GetJsonDistributorList' 28 29 data = {'city': citysName} 30 31 response = requests.post(url, headers=headers, data=data, verify=False) 32 dealers = json.loads(response.text)['data']['DealerInfos'] 33 return dealers 34 35 #解析json文件 36 37 def get_item(dealer, province_Name, citysName): 38 item = { 39 "dealer_name": dealer["StoreName"], 40 "brand_id": None, 41 "address": dealer["Address"], 42 "brand": "日产", 43 "province": province_Name, 44 "city": citysName, 45 "sale_call": dealer["SaleTel"], 46 "customer_service_call": dealer["ServiceTel"], 47 "update_time": date.today(), 48 "longitude": dealer['Longitude'], 49 "latitude": dealer['Latitude'], 50 "dealer_type": None, 51 "manufacturer_id": None, 52 "manufacturer": "东风日产", 53 "state": None, 54 "opening_date": None, 55 "close_date": None, 56 "dealer_id_web": None, 57 "controlling_shareholder": None, 58 "other_shareholders": None, 59 "status": None, 60 "remarks": None, 61 62 } 63 return item 64 65 #实例化函数返回items 66 def get_items(): 67 items = [] 68 data_list = get_data() 69 for data in data_list: 70 # print(data) 71 province_Name = data['provinceName'] 72 citys = data['citys'] 73 for city in citys: 74 citysName = city['citysName'] 75 dealers = get_Dealers(citysName) 76 for dealer in dealers: 77 item = get_item(dealer, province_Name, citysName) 78 items.append(item) 79 return items 80 81 82 #主进程存储 83 def main(): 84 print('爬虫开始--------->') 85 items = get_items() 86 print('数据存储') 87 pg = PgsqlPipeline() 88 for item in items: 89 pg.process_item(item) 90 pg.close() 91 print('数据存储完成') 92 93 94 if __name__ == '__main__': 95 main()
1 import psycopg2 2 3 class PgsqlPipeline(object): 4 def __init__(self): 5 self.conn = psycopg2.connect(database="dealer", user="postgres", password="postgres", host="127.0.0.1", port="5432") 6 self.cursor = self.conn.cursor() 7 def process_item(self, item): 8 insert_sql = f"""INSERT INTO "201905.tDealer" 9 ("sDealerName","nBrandID","sBrand","sProvince","sCity","sAddress","sSaleCall","sCustomerServiceCall","sDealerType","nManufacturerID","sManufacturer","nState","dOpeningDate","dCloseDate","dUpdateTime","nDealerIDWeb","sLongitude","sLatitude","sControllingShareholder","sOtherShareholders","sStatus","sRemarks") 10 VALUES 11 12 (%(dealer_name)s, %(brand_id)s, %(brand)s, %(province)s, %(city)s, %(address)s, %(sale_call)s, %(customer_service_call)s, %(dealer_type)s, %(manufacturer_id)s,%(manufacturer)s, %(state)s, %(opening_date)s, %(close_date)s, %(update_time)s, %(dealer_id_web)s, %(longitude)s, %(latitude)s, %(controlling_shareholder)s, %(other_shareholders)s, %(status)s, %(remarks)s)""" 13 14 self.cursor.execute(insert_sql, item) 15 self.conn.commit() 16 def close(self): 17 self.cursor.close() 18 self.conn.close() 19 if __name__ == '__main__': 20 pgsql = PgsqlPipeline() 21 print(pgsql)