Hive的常用语法

11,497 阅读

1.hive创建数据库

CREATE DATABASE|SCHEMA [IF NOT EXISTS] <database name>;

2.hive创建表 hive里一般有两种表的结构,表和外部表,以下分别是两种表的创建代码:

CREATE TABLE phone_info(id int,name String,storage String,price double)
ROW FORMAT DELIMITED //代表一行是一条记录
FIELDS TERMINATED BY '\t'//列是按照table键分开
STORED AS TEXTFILE[SEQUENCEFILE];//二种最常见的存储格式,一般可以不写
CREATE EXTERNAL TABLE phone_external(id int,name String,price double)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION '</xudong/phone.txt>';//这里填写外部表数据的hdfs地址

3.hive表中导入数据

load data local inpath '/home/xudong/xxx.txt' into table phone_info;

4.hive删除表

DROP TABLE IF EXISTS phone_info;

5.hive创建临时表存储中间结果

CREATE TABLE temp_info
AS
SELECT id phone_id,name phone_name,price FROM phone_info
SORT BY phone_id;

6.hive简单的查询语句

SELECT * FROM temp_info;
SELECT id phone_id,name phone_name FROM phone_info;
SELECT a.ip,a.name,b.username FROM phone_info a INNER JOIN user b on (a.ip=b.ip);

7.hive批量插入数据到表

CREATE TABLE phone_info_like LIKE phone_info;//复制表的结构
INSERT INTO phone_info_like SELECT * FROM phone_info;
INSERT OVERWRITE phoen_info_like SELECT * FROM phone_info;//into是追加数据,overwrite是覆盖以及存在的数据,属于重复性校验

8.hive分区表

CREATE TABLE part_table(id int,name String,ip String,city String,date String)
PARTITIONED BY (part_flag String)//这里的分区字段可以是表中字段也可以是指定的字段
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

load data local inpath '/home/xudong/test.txt' into table part_table partition(part_flag='part1');
load data local inpath '/home/xudong/test1.txt' into table part_table partition(part_flag='part2');

select * from part_table where part_flag='part1';

DataLearner 官方微信

欢迎关注 DataLearner 官方微信,获得最新 AI 技术推送

DataLearner 官方微信二维码