from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('write_data').getOrCreate()import pyspark.sql.functions as F
from pyspark.sql.types import*# Row, StructType, StructField, StringType, IntegerType
建库,建表
hive> create database if not exists Test;
hive> show Test;
hive> create table if not exists Test.wjh_test(
>phone string,
>day int);
hive> show tables;
少量写入数据
hive> use ima;
hive> insert into wjh_test values('13233344421', 20190808);
hive> insert into wjh_test values('13666655532', 20190909);
hive> select * from wjh_test:
大量写入数据(本地文件,非hdfs路径下)
# load local data
f =open('/home/今晚打老虎/phone.csv')# transform > RDD
rdd = spark.sparkContext.parallelize(f).map(lambda x : x.strip('\n').split(','))#rdd = rdd.map(lambda line: Row(line[0], int(line[1])))
schema = StructType([StructField('phone', StringType(),True), StructField('day', StringType(),True)])# schema = StructType().add('phone', 'string').add('day', 'string')
df = spark.createDataFrame(rdd, schema)
df.registerTempTable('tempTable')# 选择表
spark.sql('use Test')
spark.sql('insert into wjh_test select * from tempTable')
查询写入结果
spark.sql(‘select * from wjh_test limit 10’).show()