from __future__ import print_function
from pyspark.mllib.linalg import Matrices
from pyspark.mllib.linalg.distributed import BlockMatrix
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.mllib.linalg.distributed import RowMatrix,IndexedRowMatrix,IndexedRow
import numpy as np
import sys
def parseVector(line):
return np.array([float(x) for x in line.split(' ')])
def merge(line):
tmp = []
for i in line:
tmp.extend(i)
return tmp
def load_data(path):
data = sc.textFile(path)
line = data.map(parseVector)
lines = map(lambda x:x.tolist(), line.collect())
tmp_list = []
for i in range(len(lines)):
tmp_list.append(IndexedRow(i, lines[i]))
#print(tmp_list)
rows = sc.parallelize(tmp_list)
mat = IndexedRowMatrix(rows).toBlockMatrix()
return mat
if __name__ == "__main__":
if len(sys.argv) != 3:
pri
spark两个矩阵相乘
最新推荐文章于 2023-10-24 08:41:26 发布