hive 分区和分桶
一、设置参数
--允许使用动态分区可通过set hive.exec.dynamic.partition;查看
set hive.exec.dynamic.partition=true;
--当需要设置所有列为dynamic时需要这样设置
set hive.exec.dynamic.partition.mode=nonstrict;
--如果分区总数超过这个数量会报错
set hive.exec.max.dynamic.partitions=1000;
--单个MR Job允许创建分区的最大数量
set hive.exec.max.dynamic.partitions.pernode=1000;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
Drop table table_name; --先删除表 没有则直接建表了
CREATE TABLE table_name --创建表
(col1 string, col2 date, col3 double)
partitioned by (datekey date) --可以多个字段的组合分区
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' Stored AS TEXTFILE;
CREATE TABLE table_name4 --创建表
(col1 string, col2 date, col3 double)
partitioned by (datekey1 date, datekey2 date) --可以多个字段的组合分区
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' Stored AS TEXTFILE;
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-02-27',23.78,'2017-02-27');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-02-28',23.78,'2017-03-26');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-03-26',23.78,'2017-05-26');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-04-26',23.78,'2017-06-26');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-05-26',23.78,'2017-08-26');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-06-26',23.78,'2017-09-26');
---------------------------
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
Drop table table_name; --先删除表 没有则直接建表了
CREATE TABLE table_name2 --创建表
(col1 string, col2 date, col3 double)
partitioned by (datekey date) --可以多个字段的组合分区
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' Stored AS TEXTFILE;
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-02-27',23.78,'2017-02-27');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-02-28',23.78,'2017-03-26');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-03-26',23.78,'2017-05-26');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-04-26',23.78,'2017-06-26');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-05-26',23.78,'2017-08-26');
INSERT INTO TABLE table_Name
PARTITION (DateKey)
values('ssadsad','2017-06-26',23.78,'2017-09-26');
-----------------------------
dfs -chmod -R 777 /user/luopeng/ ;
SELECT col1,col2,col3,DateKey FROM otherTable
WHERE DATEKEY IN ('2017-02-26','2013-06-12','2013-12-24')
GROUP BY col1,col2,col3,DateKey
DISTRIBUTE BY DateKey
正确
create table t1(
id int
,name string
,hobby array<string>
,add map<String,string>
)
partitioned by (pt_d string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;
错误
create table t2(
id int
,name string
,hobby array<string>
,add map<String,string>
)
partitioned by (id1 int)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;
load data local inpath '/root/data' overwrite into table t1 partition ( pt_d = '201701');
show partitions t1;
hive> describe database lp;
OK
lp hdfs://cluster/apps/hive/warehouse/lp.db testET USER
Time taken: 0.114 seconds, Fetched: 1 row(s)
hive> [root@bxzj-test-swift0.bxzj.baixinlocal.com conf]# hdfs dfs -ls hdfs://cluster/apps/hive/warehouse/lp.db
Found 2 items
drwxr-x--- - testET hdfs 0 2018-09-05 13:04 hdfs://cluster/apps/hive/warehouse/lp.db/t1
drwxr-x--- - testET hdfs 0 2018-09-05 10:20 hdfs://cluster/apps/hive/warehouse/lp.db/table_name
[root@bxzj-test-swift0.bxzj.baixinlocal.com conf]# hdfs dfs -ls hdfs://cluster/apps/hive/warehouse/lp.dbt/t1
ls: `hdfs://cluster/apps/hive/warehouse/lp.dbt/t1': No such file or directory
[root@bxzj-test-swift0.bxzj.baixinlocal.com conf]# hdfs dfs -ls hdfs://cluster/apps/hive/warehouse/lp.dbt/
- 点赞
- 收藏
- 关注作者
评论(0)