hive数据操作
3,353 阅读
hive是基于Hadoop的一个数据仓库工具,可以将结构化的数据文件映射为一张数据库表,并提供简单的sql查询功能,可以将sql语句转换为MapReduce任务进行运行。 其优点是学习成本低,可以通过类SQL语句快速实现简单的MapReduce统计,不必开发专门的MapReduce应用,十分适合数据仓库的统计分析。 下面是我以前写过的hive操作语句,供大家参考:
1.hive查看表结构
desc formatted table_name;
desc table_name;
2.hive建立表
create table `reviews_comments_qian`(
`comm_id` bigint,
`post_id` bigint,
`user_id` string,
`comm_time` date,
`comm_content` string,
`auto_brand` string,
`auto_name` string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\u0001'
LINES TERMINATED BY '\n';
3.hive操作表
INSERT INTO reviews_comments_qian SELECT DISTINCT c.comm_id,c.post_id,c.user_id,c.comm_time,c.comm_content,aa.auto_brand,aa.auto_name FROM reviews_comments c JOIN (SELECT a.auto_brand,a.auto_name,b.post_id FROM forum_addresses_s a JOIN forum_posts_s b ON a.addr_id=b.addr_id) aa ON aa.post_id=c.post_id;
4.hive删除表
DROP TABLE IF EXISTS reviews_comments_qianyang;
5.hive更改列名
ALTER TABLE reviews_comments_qianyang CHANGE `buying goal` buying_goal STRING
create table `reviews_comments_qianyang`(
`comm_id` bigint,
`post_id` bigint,
`user_id` string,
`comm_time` ,
`comm_content` string,
`auto_brand` string,
`auto_name` string,
`buying_car` string,
`buying_place` string,
`buying_time` string,
`buying_price` string,
`buying_goal` string,
`car_id` string
)
FORMAT DELIMITED
FIELDS TERMINATED
LINES TERMINATED ;
reviews_comments_qianyang c.comm_id,c.post_id,c.user_id,c.comm_time,c.comm_content,c.auto_brand,c.auto_name,b.buying_car,b.buying_place,b.buying_time,b.buying_price,b.buying_goal,b.car_id reviews_comments_qian c ( author_id,buying_car,buying_place,buying_time,buying_price,buying_goal,car_id autohome_reviews) b c.user_idb.author_id;
`reviews_comments_qianyang1`(
`comm_id` ,
`post_id` ,
`user_id` string,
`comm_time` ,
`comm_content` string,
`auto_brand` string,
`auto_name` string,
`buying_car` string,
`buying_place` string,
`buying_time` string,
`buying_price` string,
`buying_goal` string,
`car_id` string,
`user_birth` string,
`user_gender` string,
`user_province` string,
`user_city` string
)
FORMAT DELIMITED
FIELDS TERMINATED
LINES TERMINATED ;
reviews_comments_qianyang1 c.comm_id,c.post_id,c.user_id,c.comm_time,c.comm_content,c.auto_brand,c.auto_name,c.buying_car,c.buying_place,c.buying_time,c.buying_price,c.buying_goal,c.car_id,b.user_birth,b.user_gender,b.user_province,b.user_city reviews_comments_qianyang c ( user_birth,user_id,user_gender,user_province,user_city forum_users_s) b c.user_idb.user_id;
针对同一用户合并数据
user_content user_id,concat_ws(,collect_set(comm_content)) reviews_comments_qianyanguse user_id;
hive向数据库插数据
sqoop export
mysql向hive导入数据
sqoop import
如何查看一张表里存在另一张表里不存在的记录
user_info_sample_qianyang
user_id ( user_id user_info_sample_qianyang1);
