CODE:
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Created on 2014-8-26
@author: guaguastd
@name: linkedin_network_clusters.py
'''
import os
import sys
import json
from urllib2 import HTTPError
from cluster import KMeansClustering, centroid
# A helper function to munge data and build up an XML tree
sys.path.append(os.path.join(os.getcwd(), "e:", "eclipse", "LinkedIn", "dFile"))
from mykml import createKML
K = 3
# get geo code
from geo import geo_from_bing
g = geo_from_bing()
# Load the data
CONNECTIONS_DATA = 'E:\eclipse\LinkedIn\dfile\linkedin_connections.json'
OUT_FILE = "E:\eclipse\LinkedIn\dfile\linkedin_clusters_kmeans.kml"
# Open up your saved connections with extended profile information
# or fetch them again from LinkedIn if you prefer
connections = json.loads(open(CONNECTIONS_DATA).read())['values']
locations = [c['location']['name'] for c in connections if c.has_key('location')]
# Some basic transforms
transforms = [('Greater ', ''), (' Area', '')]
# Step 1 - Tally the frequency of each location
coords_freqs = {}
for location in locations:
if not c.has_key('location'):
continue
# Avoid unnecessary I/O and geo requests by building up a cache
if coords_freqs.has_key(location):
coords_freqs[location][1] += 1
continue
transformed_location = location
for transform in transforms:
transformed_location = transformed_location.replace(*transform)
# Handle potential IO errors with a retry pattern...
while True:
num_errors = 0
try:
results = g.geocode(transformed_location, exactly_one=False)
print results
break
except HTTPError, e:
num_errors += 1
if num_errors >= 3:
sys.exit()
print >> sys.stderr, e
print >> sys.stderr, 'Encountered an urllib2 error. Trying again...'
if results is None:
continue
for result in results:
# Each result is of the form ("Description", (X,Y))
coords_freqs[location] = [result[1], 1]
break # Disambiguation strategy is "pick first"
# Step 2 - Build up data structure for converting locations to KML
expanded_coords = []
for label in coords_freqs:
# Flip lat/lon for Google Earth
((lat, lon), f) = coords_freqs[label]
expanded_coords.append((label, [(lon, lat)] * f))
# No need to clutter the map with unnecessary placemarks...
kml_items = [{'label': label, 'coords': '%s,%s' % coords[0]} for (label, coords) in expanded_coords]
# It would also be helpful to include names of your contacts on the map
for item in kml_items:
item['contacts'] = '\n'.join(['%s %s.' % (c['firstName'], c['lastName'])
for c in connections if c.has_key('location') and
c['location']['name'] == item['label']])
# Step 3 - Cluster locations and extend the KML data structure with centroids
c1 = KMeansClustering([coords for (label, coords_list) in expanded_coords
for coords in coords_list])
centroids = [{'label':'CONTROID', 'coords': '%s,%s' % centroid(c)} for c in c1.getclusters(K)]
kml_items.extend(centroids)
# Step 4 - Create the final KML output and write it to a file
kml = createKML(kml_items)
f = open(OUT_FILE, 'w')
f.write(kml)
f.close()
print 'Data written to ' + OUT_FILE
RESULT:
[Location(Beijing, Beijing, China 39 54m 0.0s N, 116 23m 0.0s E)]
[Location(Beijing, Beijing, China 39 54m 0.0s N, 116 23m 0.0s E)]
None
[Location(CA, United States 37 43m 0.0s N, 122 15m 0.0s W)]
[Location(Birmingham, England, United Kingdom 52 29m 0.0s N, 1 55m 0.0s W), Location(Birmingham, England, United Kingdom 52 27m 0.0s N, 1 43m 0.0s W), Location(Birmingham Airport, England, United Kingdom 52 27m 0.0s N, 1 44m 0.0s W), Location(Birmingham Business Park, England, United Kingdom 52 28m 0.0s N, 1 43m 0.0s W)]
[Location(Birmingham, England, United Kingdom 52 29m 0.0s N, 1 55m 0.0s W), Location(Birmingham, England, United Kingdom 52 27m 0.0s N, 1 43m 0.0s W), Location(Birmingham Airport, England, United Kingdom 52 27m 0.0s N, 1 44m 0.0s W), Location(Birmingham Business Park, England, United Kingdom 52 28m 0.0s N, 1 43m 0.0s W)]
[Location(China 36 33m 0.0s N, 103 59m 0.0s E)]
[Location(China 36 33m 0.0s N, 103 59m 0.0s E)]
[Location(Chengdu, Sichuan, China 30 40m 0.0s N, 104 5m 0.0s E)]
[Location(Chengdu, Sichuan, China 30 40m 0.0s N, 104 5m 0.0s E)]
[Location(Xingtai, Hebei, China 37 4m 0.0s N, 114 29m 0.0s E)]
[Location(Xingtai, Hebei, China 37 4m 0.0s N, 114 29m 0.0s E)]
[Location(United States 39 27m 0.0s N, 98 57m 0.0s W)]
[Location(United States 39 27m 0.0s N, 98 57m 0.0s W)]
[Location(Foshan, Guangdong, China 23 2m 0.0s N, 113 6m 0.0s E)]
[Location(Foshan, Guangdong, China 23 2m 0.0s N, 113 6m 0.0s E)]
Data written to E:\eclipse\LinkedIn\dfile\linkedin_clusters_kmeans.kml