分享知识,分享快乐

0%

starRocks部署

[clickhouse]
172.20.85.[111:113]
172.20.85.[138:142]

[fe]
172.20.85.[111:113]

[be]
172.20.85.[138:142]

1
2
ansible clickhouse -m copy -a "src=/root/.bashrc dest=/root/.bashrc "
ansible clickhouse -m shell -a "java -version "
1
2
3
ansible clickhouse -m shell -a "yum install -y rsync  "
ansible clickhouse -m synchronize -a "src=/opt/module/StarRocks dest=/opt/module/"
ansible clickhouse -m shell -a " ls /opt/module/StarRocks "

fe部署

第一次启动需指定–helper参数,后续再启动无需指定此参数
sh /opt/module/StarRocks/fe/bin/start_fe.sh --helper 172.20.85.111:9010 --daemon

1
2
ansible fe -m shell -a " sh /opt/module/StarRocks/fe/bin/stop_fe.sh "
ansible fe -m shell -a " sh /opt/module/StarRocks/fe/bin/start_fe.sh --daemon "
1
2
3
4
5
6
mysql -h 127.0.0.1 -P9030 -uroot -p
mysql> SHOW PROC '/frontends'\G
ALTER SYSTEM ADD FOLLOWER "172.20.85.112:9010";
ALTER SYSTEM ADD FOLLOWER "172.20.85.142:9010";
# ALTER SYSTEM DROP FOLLOWER "172.20.85.142:9010";
ALTER SYSTEM ADD OBSERVER "172.20.85.113:9010";

be 部署

1
2
3
4
5
6
mysql> SHOW PROC '/backends'\G
ALTER SYSTEM ADD BACKEND "cdh85-138:9050";
ALTER SYSTEM ADD BACKEND "cdh85-139:9050";
ALTER SYSTEM ADD BACKEND "cdh85-140:9050";
ALTER SYSTEM ADD BACKEND "cdh85-141:9050";
ALTER SYSTEM ADD BACKEND "cdh85-142:9050";
1
2
ansible be -m shell -a " /opt/module/StarRocks/be/bin/stop_be.sh "
ansible be -m shell -a " /opt/module/StarRocks/be/bin/start_be.sh --daemon "

如添加过程出现错误,需要通过以下命令将该 BE 节点从集群移除。

1
mysql> ALTER SYSTEM decommission BACKEND "host:port";

监控

wget https://github.com/prometheus/prometheus/releases/download/v2.29.1/prometheus-2.29.1.linux-amd64.tar.gz
tar -xf prometheus-2.29.1.linux-amd64.tar.gz -C /usr/local/

1
2
3
4
5
6
7
8
9
10
11
- job_name: 'StarRocks_Cluster01' # 每一个集群称之为一个job,可以自定义名字作为StarRocks集群名
metrics_path: '/metrics' # 指定获取监控项目的Restful Api

static_configs:
- targets: ['cdh85-111:8030','cdh85-112:8030','cdh85-113:8030']
labels:
group: fe # 这里配置了 fe 的 group,该 group 中包含了 3 个 Frontends

- targets: ['cdh85-138:8040', 'cdh85-139:8040', 'cdh85-140:8040','cdh85-141:8040','cdh85-142:8040']
labels:
group: be # 这里配置了 be 的 group,该 group 中包含了 5 个 Backends

权限

1
2
3
4
5
6
7
8
9
10
11
12
13
-- 创建用户

CREATE USER dba_starRocks@'%' IDENTIFIED BY 'dba_starRocks';

给权限

GRANT SELECT_PRIV,LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV ON *.* TO dba_starRocks;

取消权限
REVOKE LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV ON *.* FROM dba_starRocks;

查看权限
SHOW GRANTS FOR dba_starRocks

测试环境

[sr]
10.0.19.[156:158]

fe 重启

1
2
ansible sr -m shell -a " sh /opt/module/StarRocks/fe/bin/stop_fe.sh "
ansible sr -m shell -a " sh /opt/module/StarRocks/fe/bin/start_fe.sh --daemon "

be重启

1
2
ansible sr -m shell -a "sh  /opt/module/StarRocks/be/bin/stop_be.sh "
ansible sr -m shell -a "sh /opt/module/StarRocks/be/bin/start_be.sh --daemon "

hive外部表

需要把hdfs-site.xml文件 放到be和fe的conf目录里 并重启

遇到权限问题 修改 hadoop_env.sh

1
export HADOOP_USER_NAME=hdfs

手动升级

1
\cp -r /opt/module/StarRocks-2.3.0-rc01/apache_hdfs_broker /opt/module/StarRocks/apache_hdfs_broker/../

监控

1
ansible be -m shell -a " netstat -nap |grep 9050 "

supervisord守护进程

/etc/supervisord.d/StartRocks.ini

1
2
3
4
5
6
7
8
9
10
11
12
[program:be]
process_name=%(program_name)s ;进程名称
directory=/opt/module/StarRocks/be ;工作目录be所在路径
command=sh /opt/module/StarRocks/be/bin/start_be.sh ;运行的命令be的启动sh命令
autostart=true ;是否随supervisor自动开启
autorestart=true ;是否在挂了之后重启,意外关闭后会重启,比如kill掉
user=root ;用户
numprocs=1 ;进程数 如果需要同时启动多个进程,进程名称需要不一样
startretries=3 ;启动重试次数
stopasgroup=true ;是否停止子进程
killasgroup=true ;是否杀死子进程
startsecs=5 ;启动5秒后,如果还是运行状态才认为进程已经启动
1
2
3
4
5
6
supervisorctl status        //查看所有进程的状态
supervisorctl stop be //停止
supervisorctl start be //启动
supervisorctl restart be //重启
supervisorctl update be //配置文件修改后使用该命令加载新的配置
supervisorctl reload //重新启动配置中的所有程序

官网升级

升级 BE 前的准备

1
2
3
4
5
6
7
8
9
10
11
12
13

#为了避免 BE 重启期间不必要的 Tablet 修复,进而影响升级后的集群性能,建议在升级前先在 FE Leader 上执行如下命令以禁用 Tablet 调度功能,
> mysql -h 127.0.0.1 -P9030 -uroot -p

admin set frontend config ("max_scheduling_tablets"="0");
admin set frontend config ("disable_balance"="true");
admin set frontend config ("disable_colocate_balance"="true");

# 在所有 BE 重启升级完成后,通过 show backends 命令确认所有 BE 的 Alive 状态为 true 后,启用 Tablet 调度功能,

admin set frontend config ("max_scheduling_tablets"="10000");
admin set frontend config ("disable_balance"="false");
admin set frontend config ("disable_colocate_balance"="false");

升级

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
scp StarRocks-2.5.0-rc03.tar.gz usdp05:/opt/module/
#ansible clickhouse -m copy -a "src=/opt/module/StarRocks-2.5.0-rc03.tar.gz dest=/opt/module/"

cd /opt/module
tar -zxvf StarRocks-2.3.18.tar.gz

# 升级 BE 节点
supervisorctl stop starrocks_be
cd /opt/module/StarRocks/be
sh bin/stop_be.sh
rm -rf lib.bak && mv lib lib.bak
rm -rf bin.bak && mv bin bin.bak
cp -r /opt/module/StarRocks-2.3.18/be/lib .
cp -r /opt/module/StarRocks-2.3.18/be/bin .
ps aux | grep starrocks_be
supervisorctl start starrocks_be
ps aux | grep starrocks_be

# 升级 FE 节点
supervisorctl stop starrocks_fe
cd /opt/module/StarRocks/fe
rm -rf meta.bak/ && cp -r meta meta.bak
rm -rf lib.bak && mv lib lib.bak
rm -rf bin.bak && mv bin bin.bak
rm -rf spark-dpp.bak && mv spark-dpp spark-dpp.bak
cp -r /opt/module/StarRocks-2.3.18/fe/lib .
cp -r /opt/module/StarRocks-2.3.18/fe/bin .
cp -r /opt/module/StarRocks-2.3.18/fe/spark-dpp .
supervisorctl start starrocks_fe
# sh bin/start_fe.sh --daemon
ps aux | grep StarRocksFE

# 升级 Broker
cd /opt/module/StarRocks/apache_hdfs_broker
rm -rf lib.bak && mv lib lib.bak
rm -rf bin.bak && mv bin bin.bak
cp -r /opt/module/StarRocks-2.5.0-rc03/apache_hdfs_broker/lib .
cp -r /opt/module/StarRocks-2.5.0-rc03/apache_hdfs_broker/bin .
sh bin.bak/stop_broker.sh
sh bin/start_broker.sh --daemon

批量操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
ansible sr -m shell -a " sh /opt/module/StarRocks/be/bin/stop_be.sh "
ansible sr -m shell -a " sh /opt/module/StarRocks/be/bin/start_be.sh --daemon "

ansible sr -m shell -a "supervisorctl status "
ansible sr -m shell -a "supervisorctl start starrocks_be"


ansible sr -m shell -a " sh /opt/module/StarRocks/fe/bin/stop_fe.sh "
ansible sr -m shell -a " sh /opt/module/StarRocks/fe/bin/start_fe.sh --daemon "

ansible sr -m shell -a "supervisorctl status "
ansible sr -m shell -a "supervisorctl start starrocks_fe"


supervisorctl start starrocks_broker

扩缩容 BE 集群

系统优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
echo " #创建数据文件目录"
for i in {1..10} ;do mkdir /dfs/data$i -p;done
for i in {a..j};do /usr/sbin/mkfs.xfs /dev/sd$i;done
echo -e "/dev/sda /dfs/data10 xfs defaults 0 0
/dev/sdb /dfs/data1 xfs defaults 0 0
/dev/sdc /dfs/data2 xfs defaults 0 0
/dev/sdd /dfs/data3 xfs defaults 0 0
/dev/sde /dfs/data4 xfs defaults 0 0
/dev/sdf /dfs/data5 xfs defaults 0 0
/dev/sdg /dfs/data6 xfs defaults 0 0
/dev/sdh /dfs/data7 xfs defaults 0 0
/dev/sdi /dfs/data8 xfs defaults 0 0
/dev/sdj /dfs/data9 xfs defaults 0 0" >> /etc/fstab
mount -a

--修改主机名
hostnamectl set-hostname cdh85-234
-- 关闭交换区
sudo vim /etc/sysctl.conf
vm.swappiness = 0
-- 允许内存超配
sudo vim /etc/sysctl.conf
vm.overcommit_memory = 1
-- 关闭透明大页
vim /etc/default/grub # 找到 GRUB_CMDLINE_LINUX 字段所在一行,在末尾添加 transparent_hugepage=madvise GRUB_CMDLINE_LINUX="..其他参数... transparent_hugepage=madvise"
-- cpupower设置performance
cpupower frequency-set -g performance
--在溢出时中止 TCP
sudo vim /etc/sysctl.conf
net.ipv4.tcp_abort_on_overflow = 1
-- 关闭内核错误生成的文件
sudo vim /etc/sysctl.conf
kernel.core_pattern = |/bin/false

-- 设置最大文件打开数
ansible newhost -m copy -a "src=/etc/security/limits.conf dest=/etc/security/limits.conf"
ansible newhost -m copy -a "src=/etc/systemd/system.conf dest=/etc/systemd/system.conf"
ansible newhost -m copy -a "src=/etc/security/limits.d/20-nproc.conf dest=/etc/security/limits.d/20-nproc.conf"

ansible newhost -m copy -a "src=/etc/sysctl.conf dest=/etc/sysctl.conf "
ansible newhost -m copy -a "src=/etc/hosts dest=/etc/hosts"
ansible newhost -m copy -a "src=/etc/profile dest=/etc/profile"
1
2
ansible newhost -m shell -a "mkdir /opt/module/ "
ansible be -m copy -a "src=/opt/module/StarRocks-2.3.18.tar.gz dest=/opt/module/StarRocks-2.3.18.tar.gz"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

ansible newhost -m copy -a "src=/root/supervisor-4.2.5.tar.gz dest=/tmp/"
ansible newhost -m shell -a "tar -zxvf /tmp/supervisor-4.2.5.tar.gz "
ansible newhost -m shell -a "yum install python-setuptools -y "
ansible newhost -m shell -a "cd supervisor-4.2.5 && python setup.py install"
ansible newhost -m shell -a "mkdir /etc/supervisor "
ansible newhost -m copy -a "src=/etc/supervisor/supervisord.conf dest=/etc/supervisor/"
ansible newhost -m copy -a "src=/usr/lib/systemd/system/supervisord-4.2.5.service dest=/usr/lib/systemd/system/"
ansible newhost -m shell -a "systemctl enable supervisord-4.2.5.service"
ansible newhost -m shell -a "systemctl is-enabled supervisord-4.2.5.service"
ansible newhost -m shell -a "mkdir /etc/supervisord.d/"
scp cdh85-189:/etc/supervisord.d/starrocks_be.ini /tmp/
ansible be -m copy -a "src=/tmp/starrocks_be.ini dest=/etc/supervisord.d/"
ansible newhost -m shell -a "systemctl start supervisord-4.2.5.service"
ansible newhost -m shell -a "systemctl status supervisord-4.2.5.service"
ansible newhost -m shell -a "supervisorctl status all"