hive
1.下载hive
可从apache上下载hive安装包:http://mirror.bit.edu.cn/apache/hive/
2.在hadoop01容器中解压
cd /home/hadoop
tar -zxf apache-hive-3.1.2-bin.tar.gz
3.配置环境变量
vi ~/.bashrc
export JAVA_HOME=/home/hadoop/javajdk18/jdk/
export HADOOP_HOME=/home/hadoop/hadoop/
export HIVE_HOME=/home/hadoop/hive
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin
4.修改hive配置文件:conf/hive-env.sh
export JAVA_HOME=/home/hadoop/javajdk18/jdk
export HADOOP_HOME=/home/hadoop/hadoop
export HIVE_HOME=/home/hadoop/hive
5.添加conf/hive-site.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.15.45:3307/hivedb?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root</value>
</property>
</configuration>
6.复制默认的hive-default.xml
cp conf/hive-default.xml.template conf/hive-default.xml
7.拷贝驱动hive/lib
下载地址:https://dev.mysql.com/downloads/connector/j/
mysql-connector-java-5.1.39-bin.jar
8.初始化
schematool -initSchema -dbType mysql
9.启动hive
hive
10.创建普通表
create table mytable(id int, name string)
row format delimited fields terminated by '\t';
导入数据:load data local inpath '/tmp/test/mytable.txt' overwrite into table mytable;
11.查看表
show tables;
12.创建外部表(指定warehouse下面的文件夹,不要直接指定warehouse,这样会清空其它表)
create external table if not exists pageview(
pageid int,
page_url string comment 'The page URL'
)
row format delimited fields terminated by ','
location 'hdfs://ns1/user/hive/warehouse/pageview/';
13.创建分区表
create table if not exists invites(
id int,
name string
)
partitioned by (ds string)
row format delimited fields terminated by ','
lines terminated by '\n' stored as textfile;
导入数据:load data local inpath '/tmp/test/invites.txt' overwrite into table invites partition (ds='201911');
hive> create table invites2(
> id int,
> name string,
> hobby array<string>,
> add map<string,string>)
> partitioned by (pt_d string)
> row format delimited fields terminated by ','
> collection items terminated by '-'
> map keys terminated by ':';
分区字段和表字段名称不能重复。
分区操作参考链接:https://blog.csdn.net/qq_36743482/article/details/78418343
14.创建带桶的表
create table student(
id int,
age int,
name string
)
partitioned by(stat_data string)
clustered by(id) sorted by(age) into 2 buckets
row format delimited fields terminated by ',';
分桶管理:https://blog.csdn.net/freefish_yzx/article/details/77150714
insert into table bck_student select id,name,sex,age,department from student distribute by sex;
15.增加分区
alter table invites add partition(ds='a') partition(ds='b');
16.删除分区
alter table invites drop partition(ds='a');
17.重命名表
alter table tmp_student rename to bck_student2;
18.增加新列
desc student;
alter table student add columns (name1 string);
19.更新列
alter table student replace columns (id int, age int, name string);
20.显示命令
show tables
show databases
show partitions
show functions
desc extended t_name;
desc formatted table_name;
21.加载数据
load data local inpath '/tmp/test/invites.txt' overwrite into table invites partition (ds='201911');
22.导出数据到HDFS
insert overwrite directory 'hdfs://ns1/user/hive/warehouse/mystudent' select * from bck_student;