from pyspark.sql.functions import split, explode
DF = sqlContext.createDataFrame([('cat
elephant rat
rat cat', )], ['word'])
print 'Dataset:'
DF.show()
print '
Trying to do explode:
'
DFsplit_explode = (
DF
.select(split(DF['word'], ' '))
.select(explode(DF['word'])) # AnalysisException: u"cannot resolve 'explode(word)' due to data type mismatch: input to function explode should be array or map type, not StringType;"
.map(explode) # AttributeError: 'PipelinedRDD' object has no attribute 'show'
.explode() # AttributeError: 'DataFrame' object has no attribute 'explode'
).show()
Trying without split
print '
Only explode:
'
DFsplit_explode = (
DF
.select(explode(DF['word'])) # AnalysisException: u"cannot resolve 'explode(word)' due to data type mismatch: input to function explode should be array or map type, not StringType;"
).show()