赞
踩
1.基本字段类型:primitive_data
- 数值类型: int、bigint、float、double、DECIMAL
-
- 字符串:string
-
- 布尔类型:true、false #生产用1/0代替
-
- 时间类型:date、TIMESTAMP 等 #生产用字符串代替,如:19010101010101
2.数组类型:arrary_data
存放相同类型的数据集合
- #创建一张包含array字段的表,array字段的分割符采用的是逗号
- create table hive_array(
- name string,
- work_locations array<string>
- )
- ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
- COLLECTION ITEMS TERMINATED BY ',';
-
- #加载数据
- load data local inpath '/home/hadoop/data/hive_array.txt' overwrite into table hive_array
-
- #直接查询
- hive> select * from hive_array;
- OK
- ruoze ["shanghai","hangzhou","beji"]
- jepson ["hangzhou","wuhan","shenzheng"]
- Time taken: 0.283 seconds, Fetched: 2 row(s)
-
- #查询数组中的某个字段包含某个元素
- select * from hive_array where array_contains(work_locations,"shanghai");
-
- #查询数组具体某个下标的结果
- select name,work_locations[2] from hive_array;
-
- #查询数组的大小
- select name,size(work_locations) from hive_array;

存放相同类型的k-v键值对集合
- #创建一张包含map类型字段的表,map集合元分割符#,键值对的分隔符:
- create table hive_map(
- id int,
- name string,
- members map<string,string>,
- age int
- )
- ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
- COLLECTION ITEMS TERMINATED BY '#'
- MAP KEYS TERMINATED BY ':';
-
- #加载数据
- load data local inpath '/home/hadoop/data/hive_map.txt'
- overwrite into table hive_map;
-
- #直接查询
- hive> select * from hive_map;
- OK
- 1 zhansan {"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"} 28
- 2 lis {"father":"mayun","mother":"huangyi","brother":"guanyu"} 22
- 3 wangwu {"father":"wangjianlin","mother":"ruhua","sister":"jianting"} 29
- 4 mayun {"father":"mayongzhen","mother":"angelababy"} 26
-
- #查询某个键的值
-
- select name,members["father"] from hive_map;
- #查询map字段的所有的key,所有的valuse。
- select map_keys(members) from hive_map;
- select map_values(members) from hive_map;
-
- #查询map集合大小
- select size(members) from hive_map;

可存放不同类型的数据的集合
- #创建一张包含struct类型字段的表,struct有由string和int两种类型数据组成,分割符为:
- create table hive_struct(
- ip string,
- userinfo struct<name:string,age:int>
- )
- ROW FORMAT DELIMITED FIELDS TERMINATED BY '#'
- COLLECTION ITEMS TERMINATED BY ':';
-
- #加载数据
- load data local inpath '/home/hadoop/data/hive_struct.txt'
- overwrite into table hive_struct;
-
- #直接查询
- hive> select * from hive_struct;
- OK
- 192.168.1.1 {"name":"zhangsan","age":40}
- 192.168.1.2 {"name":"lisi","age":50}
- 192.168.1.3 {"name":"wangwu","age":60}
- 192.168.1.4 {"name":"zhaoliu","age":70}
- Time taken: 0.206 seconds, Fetched: 4 row(s)
-
- #查询结构体某个子节点信息
- select ip,userinfo.name,userinfo.age from hive_struct;

注意:非基本类型在生产中会被用到,至于对比基本类型性能如何,不是特别重要,因为最终都是由行式存储转为列式存储的大宽表,供生产计算和查询的(sql)。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。