from pyspark import SparkContext
from pyspark import SparkConf
from pyspark.sql import
HiveContext
from pyspark.mllib.regression import LabeledPoint
string_test = 'pyspark_test'
conf = SparkConf().setAppName(string_test).setMaster('yarn')
sc = SparkContext(conf=conf)
hive_context = HiveContext(sc)
data = hive_context.sql(select * from database.table)#spark dataframe
tdata = data.rdd.map(lambda x: LabeledPoint(x[0], x[1:]))#rdd