[hadoop@node1 spark-1.5.2-bin-hadoop2.6]$ cd examples/src/main/resources/
[hadoop@node1 resources]$ file users.parquet
users.parquet: Par archive data
[hadoop@node1 resources]$ strings users.parquet|more
PAR1
Alyssa
example.avro.User
name%
favorite_color%
favorite_numbers
array
name
favorite_color
favorite_numbers
array
avro.schema
{"type":"record","name":"User","namespace":"example.avro","fields":[{"name":"name","type":"string"},{"name":"favorite_color","type":["string","null"]},{"name":"favorit
e_numbers","type":{"type":"array","items":"int"}}]}
parquet-mr version 1.4.3
PAR1
--读取parquet,保存为parquet
scala> val df = sqlContext.read.load("hdfs://node1:8020/test/input/users.parquet")
df: org.apache.spark.sql.DataFrame = [name: string, favorite_color: string, favorite_numbers: array<int>]
scala> df.select("name", "favorite_color").write.save("namesAndFavColors.parquet")
[hadoop@node1 resources]$ hadoop fs -ls /user/hadoop/namesAndFavColors.parquet
15/12/15 10:13:58 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 4 items
-rw-r--r-- 1 hadoop supergroup 0 2015-12-15 10:13 /user/hadoop/namesAndFavColors.parquet/_SUCCESS
-rw-r--r-- 1 hadoop supergroup 303 2015-12-15 10:13 /user/hadoop/namesAndFavColors.parquet/_common_metadata
-rw-r--r-- 1 hadoop supergroup 537 2015-12-15 10:13 /user/hadoop/namesAndFavColors.parquet/_metadata
-rw-r--r-- 1 hadoop supergroup 549 2015-12-15 10:13 /user/hadoop/namesAndFavColors.parquet/part-r-00000-1523bee5-95b8-497c-b2d2-924a06eace33.gz.parquet
--读取json,保存为parquet
scala> val df = sqlContext.read.format("json").load("hdfs://node1:8020/test/input/people.json")
df: org.apache.spark.sql.DataFrame = [age: bigint, name: string]
scala> df.select("name", "age").write.format("parquet").save("namesAndAges.parquet")
[hadoop@node1 ~]$ hadoop fs -ls /user/hadoop/namesAndAges.parquet
Found 5 items
-rw-r--r-- 1 hadoop supergroup 0 2015-12-15 10:31 /user/hadoop/namesAndAges.parquet/_SUCCESS
-rw-r--r-- 1 hadoop supergroup 277 2015-12-15 10:31 /user/hadoop/namesAndAges.parquet/_common_metadata
-rw-r--r-- 1 hadoop supergroup 750 2015-12-15 10:31 /user/hadoop/namesAndAges.parquet/_metadata
-rw-r--r-- 1 hadoop supergroup 537 2015-12-15 10:31 /user/hadoop/namesAndAges.parquet/part-r-00000-d9c21326-ae90-437d-a952-46524e22ca2e.gz.parquet
-rw-r--r-- 1 hadoop supergroup 531 2015-12-15 10:31 /user/hadoop/namesAndAges.parquet/part-r-00001-d9c21326-ae90-437d-a952-46524e22ca2e.gz.parquet
sparksql语法,读parquet,load,save
最新推荐文章于 2024-06-03 10:17:39 发布