trainMatrix, testRatings = load_data('yelp.rating')
def load_data(ratingFile, testRatio=0.1):
user_count = item_count = 0
ratings = []
for line in open(ratingFile):
arr = line.strip().split()
user_id = int(arr[0])
item_id = int(arr[1])
score = float(arr[2])
timestamp = long(arr[3])
ratings.append((user_id, item_id, score, timestamp))
user_count = max(user_count, user_id)
item_count = max(item_count, item_id)
user_count += 1
item_count += 1
ratings = sorted(ratings, key=lambda x: x[3]) # sort by timestamp
test_count = int(len(ratings) * testRatio)
count = 0
trainMatrix = sp.lil_matrix((user_count, item_count))
testRatings = []
for rating in ratings:
if count < len(ratings) - t