# 상황설명 (Ubuntu22.04 기준)
일반 파일로 다운로드 받아 설치 및 실행(데몬) 파일을 리눅스 서비스로 등록하는 방법
예) /etc/prometheus/alertmanager/ alertmanager 파일 실행시
# cd /etc/prometheus/alertmanager
# pwd
/etc/prometheus/alertmanager
# ls
alertmanager alertmanager.yml amtool data LICENSE NOTICE rules
# ./alertmanager &
# netstat -ntpa |grep LISTEN
tcp6 0 0 :::9093 :::* LISTEN 13856/./alertmanage
tcp6 0 0 :::9094 :::* LISTEN 13856/./alertmanage
# ps -ef |grep alertmanager
root 13856 11451 0 11:03 pts/0 00:00:00 ./alertmanager
root 13876 11451 0 11:04 pts/0 00:00:00 grep --color=auto alertmanager
# kill -9 13856
파일 실행 (*.9093/*.9094 LISTEN, node_exporter) 은 되지만 서버 리부팅 등 이후 수동으로 재기동 해줘야 됨
# 프로세서를 리눅스 서비스로 등록하는 방법(systemctl)
- 해당 alertmanager 파일 관리계정 및 파일 복사 준비
# cd /etc/prometheus/alertmanager/
# pwd
/etc/prometheus/alertmanager/
# ls -al
total 65932
drwxr-xr-x 4 prometheus prometheus 4096 Mar 3 10:05 .
drwxr-xr-x 5 prometheus prometheus 4096 Mar 3 07:31 ..
-rwxr-xr-x 1 prometheus prometheus 37345962 Feb 28 20:52 alertmanager <-- 이 실행파일을 리눅스 서비스로 만든다
-rw-r--r-- 1 prometheus prometheus 356 Feb 28 20:55 alertmanager.yml
-rwxr-xr-x 1 prometheus prometheus 30130103 Feb 28 20:52 amtool
drwxr-xr-x 2 root root 4096 Mar 3 09:41 data
-rw-r--r-- 1 prometheus prometheus 11357 Feb 28 20:55 LICENSE
-rw-r--r-- 1 prometheus prometheus 457 Feb 28 20:55 NOTICE
drwxr-xr-x 2 prometheus prometheus 4096 Mar 3 09:41 rules
# User 추가
# useradd -M -r -s /bin/false alertmanager
# User 추가 확인
# cat /etc/passwd
alertmanager:x:995:994::/home/alertmanager:/bin/false
# 실행 파일을 /usr/local/bin으로 경로 이동
# cp alertmanager /usr/local/bin/
# 유저, 그룹 권한 추가
# cd /usr/local/bin
# chown alertmanager:alertmanager /usr/local/bin/alertmanager
# cd /etc/systemd/system
# vi alertmanager.service
# 아래 내용을 추가
[Unit]
Description=alertmanager
Wants=network-online.target
After=network-online.target
[Service]
User=alertmanager
Group=alertmanager
Type=simple
ExecStart=/usr/local/bin/alertmanager
[Install]
WantedBy=multi-user.target
# 파일 퍼미션 변경
# chmod 744 alertmanager.service
# systemctl daemon-reload
# systemctl stop alertmanager.service
# systemctl enable alertmanager.service
# systemctl start alertmanager.service
# systemctl status alertmanager.service
× alertmanager.service - alertmanager
Loaded: loaded (/etc/systemd/system/alertmanager.service; enabled; vendor preset: enabled)
Active: failed (Result: exit-code) since Sun 2024-03-03 10:09:13 KST; 3min 29s ago
Main PID: 12922 (code=exited, status=1/FAILURE)
CPU: 66ms
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.098Z caller=main.go:182 level=info build_context="(go=go1.21.7, platform=linux/amd64, user=root@22cd11f671e9, date=20240228-11:51:20, tags=netgo)"
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.104Z caller=cluster.go:186 level=info component=cluster msg="setting advertise address explicitly" addr=10.0.2.15 port=9094
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.110Z caller=cluster.go:683 level=info component=cluster msg="Waiting for gossip to settle..." interval=2s
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.130Z caller=coordinator.go:113 level=info component=configuration msg="Loading configuration file" file=alertmanager.yml
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.130Z caller=coordinator.go:118 level=error component=configuration msg="Loading configuration file failed" file=alertmanager.yml err="open alertmanager.yml: no such file or directory"
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.130Z caller=cluster.go:692 level=info component=cluster msg="gossip not settled but continuing anyway" polls=0 elapsed=20.021084ms
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.130Z caller=silence.go:442 level=info component=silences msg="Creating shutdown snapshot failed" err="open data/silences.51ab1e5945c48bff: permission denied"
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.131Z caller=nflog.go:362 level=error component=nflog msg="Creating shutdown snapshot failed" err="open data/nflog.5acef5dc6432c333: permission denied"
Mar 03 10:09:13 servidor systemd[1]: alertmanager.service: Main process exited, code=exited, status=1/FAILURE
Mar 03 10:09:13 servidor systemd[1]: alertmanager.service: Failed with result 'exit-code'.
- 서비스 faild 떨어짐 (환경변수 파일을 찾을수 없어 수정)
# failed 로그를 보면 alertmanager.yml 파일을 찾을수 없다고 나옴
Mar 03 10:09:13 servidor alertmanager[12922]: ts=2024-03-03T01:09:13.130Z caller=coordinator.go:113 level=info component=configuration msg="Loading configuration file" file=alertmanager.yml
# servie 파일 다시 수정
# vi alertmanager.service
[Unit]
Description=alertmanager
Wants=network-online.target
After=network-online.target
[Service]
User=alertmanager
Group=alertmanager
Type=simple
ExecStart=/usr/local/bin/alertmanager \
--config.file /etc/prometheus/alertmanager/alertmanager.yml <--- yml 파일 추가
[Install]
WantedBy=multi-user.target
# systemctl stop alertmanager.service
# systemctl start alertmanager.service
# systemctl enable alertmanager.service
# systemctl status alertmanager.service
● alertmanager.service - alertmanager
Loaded: loaded (/etc/systemd/system/alertmanager.service; enabled; vendor preset: enabled)
Active: active (running) since Sun 2024-03-03 11:19:33 KST; 16s ago
Main PID: 14007 (alertmanager)
Tasks: 7 (limit: 2219)
Memory: 13.1M
CPU: 146ms
CGroup: /system.slice/alertmanager.service
└─14007 /usr/local/bin/alertmanager --config.file /etc/prometheus/alertmanager/alertmanager.yml
Mar 03 11:19:33 servidor alertmanager[14007]: ts=2024-03-03T02:19:33.083Z caller=main.go:181 level=info msg="Starting Alertmanager" version="(version=0.27.0, branch=HEAD, revision=0aa3c2aad14cff039931923ab16b26b7481783b5)"
Mar 03 11:19:33 servidor alertmanager[14007]: ts=2024-03-03T02:19:33.083Z caller=main.go:182 level=info build_context="(go=go1.21.7, platform=linux/amd64, user=root@22cd11f671e9, date=20240228-11:51:20, tags=netgo)"
Mar 03 11:19:33 servidor alertmanager[14007]: ts=2024-03-03T02:19:33.091Z caller=cluster.go:186 level=info component=cluster msg="setting advertise address explicitly" addr=10.0.2.15 port=9094
Mar 03 11:19:33 servidor alertmanager[14007]: ts=2024-03-03T02:19:33.094Z caller=cluster.go:683 level=info component=cluster msg="Waiting for gossip to settle..." interval=2s
Mar 03 11:19:33 servidor alertmanager[14007]: ts=2024-03-03T02:19:33.120Z caller=coordinator.go:113 level=info component=configuration msg="Loading configuration file" file=/etc/prometheus/alertmanager/alertmanager.yml
Mar 03 11:19:33 servidor alertmanager[14007]: ts=2024-03-03T02:19:33.121Z caller=coordinator.go:126 level=info component=configuration msg="Completed loading of configuration file" file=/etc/prometheus/alertmanager/alertmanager.yml
Mar 03 11:19:33 servidor alertmanager[14007]: ts=2024-03-03T02:19:33.123Z caller=tls_config.go:313 level=info msg="Listening on" address=[::]:9093
Mar 03 11:19:33 servidor alertmanager[14007]: ts=2024-03-03T02:19:33.123Z caller=tls_config.go:316 level=info msg="TLS is disabled." http2=false address=[::]:9093
Mar 03 11:19:35 servidor alertmanager[14007]: ts=2024-03-03T02:19:35.096Z caller=cluster.go:708 level=info component=cluster msg="gossip not settled" polls=0 before=0 now=1 elapsed=2.002291107s
Mar 03 11:19:43 servidor alertmanager[14007]: ts=2024-03-03T02:19:43.124Z caller=cluster.go:700 level=info component=cluster msg="gossip settled; proceeding" elapsed=10.029680225s