설치 환경

  • Mac Pro 14 - UTAM
  • ubuntu-25.04-desktop-arm64

 

etcd Single 설치 및 실행

ETCD_VER=v3.6.2

# choose either URL
GOOGLE_URL=https://storage.googleapis.com/etcd
DOWNLOAD_URL=${GOOGLE_URL}

rm -f /tmp/etcd-${ETCD_VER}-linux-arm64.tar.gz
rm -rf /tmp/etcd-download-test && mkdir -p /tmp/etcd-download-test

curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-arm64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-arm64.tar.gz
tar xzvf /tmp/etcd-${ETCD_VER}-linux-arm64.tar.gz -C /tmp/etcd-download-test --strip-components=1 --no-same-owner
rm -f /tmp/etcd-${ETCD_VER}-linux-arm64.tar.gz

/tmp/etcd-download-test/etcd --version
/tmp/etcd-download-test/etcdctl version
/tmp/etcd-download-test/etcdutl version

# start a local etcd server
# screen 으로 띄우자
sudo cp etcd etcdctl etcdutl /usr/local/bin/
screen -S etcd
etcd  -- 실행
# 로그가 실행되면, Screen 에서 빠져나오자.

# write,read to etcd test
/tmp/etcd-download-test/etcdctl --endpoints=localhost:2379 put foo bar
/tmp/etcd-download-test/etcdctl --endpoints=localhost:2379 get foo

# test2
> etcdctl put mykey "this is awesome"
OK

> etcdctl get mykey
mykey
this is awesome
 

Releases · etcd-io/etcd

Distributed reliable key-value store for the most critical data of a distributed system - etcd-io/etcd

github.com

 

etcd Cluster 구성

vi /etc/systemd/system/etcd1.service
#아래내용 작성
[Unit]
Description=etcd instance 1
After=network.target

[Service]
User=etcd
Type=notify
ExecStart=/usr/local/bin/etcd \
  --name etcd1 \
  --data-dir /var/lib/etcd1 \
  --listen-peer-urls http://127.0.0.1:2380 \
  --listen-client-urls http://127.0.0.1:2379 \
  --advertise-client-urls http://127.0.0.1:2379 \
  --initial-advertise-peer-urls http://127.0.0.1:2380 \
  --initial-cluster etcd1=http://127.0.0.1:2380,etcd2=http://127.0.0.1:2480,etcd3=http://127.0.0.1:2580 \
  --initial-cluster-token single-machine-cluster \
  --initial-cluster-state new

Restart=always
RestartSec=5s




vi /etc/systemd/system/etcd2.service
#아래내용 작성
[Install]
WantedBy=multi-user.target

[Unit]
Description=etcd instance 2
After=network.target

[Service]
User=etcd
Type=notify
ExecStart=/usr/local/bin/etcd \
  --name etcd2 \
  --data-dir /var/lib/etcd2 \
  --listen-peer-urls http://127.0.0.1:2480 \
  --listen-client-urls http://127.0.0.1:2479 \
  --advertise-client-urls http://127.0.0.1:2479 \
  --initial-advertise-peer-urls http://127.0.0.1:2480 \
  --initial-cluster etcd1=http://127.0.0.1:2380,etcd2=http://127.0.0.1:2480,etcd3=http://127.0.0.1:2580 \
  --initial-cluster-token single-machine-cluster \
  --initial-cluster-state new

Restart=always
RestartSec=5s



vi /etc/systemd/system/etcd3.service
#아래내용 작성
[Install]
WantedBy=multi-user.target


[Unit]
Description=etcd instance 3
After=network.target

[Service]
User=etcd
Type=notify
ExecStart=/usr/local/bin/etcd \
  --name etcd3 \
  --data-dir /var/lib/etcd3 \
  --listen-peer-urls http://127.0.0.1:2580 \
  --listen-client-urls http://127.0.0.1:2579 \
  --advertise-client-urls http://127.0.0.1:2579 \
  --initial-advertise-peer-urls http://127.0.0.1:2580 \
  --initial-cluster etcd1=http://127.0.0.1:2380,etcd2=http://127.0.0.1:2480,etcd3=http://127.0.0.1:2580 \
  --initial-cluster-token single-machine-cluster \
  --initial-cluster-state new

Restart=always
RestartSec=5s

[Install]
WantedBy=multi-user.target
root@QEMU-Virtual-Machine:/etc/systemd/system# sudo systemctl daemon-reexec
root@QEMU-Virtual-Machine:/etc/systemd/system# sudo systemctl daemon-reload

root@QEMU-Virtual-Machine:/etc/systemd/system# sudo systemctl enable --now etcd1
Created symlink '/etc/systemd/system/multi-user.target.wants/etcd1.service' → '/etc/systemd/system/etcd1.service'.

root@QEMU-Virtual-Machine:/etc/systemd/system# sudo systemctl enable --now etcd2
Created symlink '/etc/systemd/system/multi-user.target.wants/etcd2.service' → '/etc/systemd/system/etcd2.service'.

root@QEMU-Virtual-Machine:/etc/systemd/system# sudo systemctl enable --now etcd3
Created symlink '/etc/systemd/system/multi-user.target.wants/etcd3.service' → '/etc/systemd/system/etcd3.service'.

root@QEMU-Virtual-Machine:/etc/systemd/system# systemctl status etcd1
● etcd1.service - etcd instance 1
     Loaded: loaded (/etc/systemd/system/etcd1.service; enabled; preset: enabled)
     Active: active (running) since Tue 2025-07-15 00:48:37 KST; 26s ago
 Invocation: fe945f33a9544d13bffc13deb0e4491b
   Main PID: 30951 (etcd)
      Tasks: 10 (limit: 8745)
     Memory: 15M (peak: 16.5M)
        CPU: 4.740s
     CGroup: /system.slice/etcd1.service
             └─30951 /usr/local/bin/etcd --name etcd1 --data-dir /var/lib/etcd1 --listen-peer-urls http://127.0.0.1:2380 --listen-client-urls http://127.0.0.1:2379 --advertise-client-urls http://127.0.0.1:2379 --initial-advertise-peer-urls http://127.0.0.1:2380 --initial-cluster etcd1=http://127.0.0.1:2380,etcd2=http://127.0.0.1:2480,etcd3=http://127.0.0.1:2580 --initial-cluster-token single-machine-cluster --initial-clu>

Jul 15 00:48:44 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:44.793367+0900","caller":"rafthttp/stream.go:248","msg":"set message encoder","from":"e17d9e661baad1bc","to":"d6b6fc75feb1ea61","stream-type":"stream MsgApp v2"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:44.793379+0900","caller":"rafthttp/stream.go:273","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream MsgApp v2","local-member-id":"e17d9e661baad1bc","remote-peer-id":"d6b6fc75feb1ea61"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:44.823457+0900","caller":"rafthttp/stream.go:411","msg":"established TCP streaming connection with remote peer","stream-reader-type":"stream MsgApp v2","local-member-id":"e17d9e661baad1bc","remote-peer-id":"d6b6fc75feb1ea61"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:44.823823+0900","caller":"rafthttp/stream.go:411","msg":"established TCP streaming connection with remote peer","stream-reader-type":"stream Message","local-member-id":"e17d9e661baad1bc","remote-peer-id":"d6b6fc75feb1ea61"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:45.949082+0900","caller":"etcdserver/server.go:2409","msg":"updating cluster version using v3 API","from":"3.0","to":"3.6"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:45.952411+0900","caller":"membership/cluster.go:673","msg":"updated cluster version","cluster-id":"af7e1200b66aabd1","local-member-id":"e17d9e661baad1bc","from":"3.0","to":"3.6"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:45.952565+0900","caller":"api/capability.go:76","msg":"enabled capabilities for version","cluster-version":"3.6"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:45.952600+0900","caller":"etcdserver/server.go:2424","msg":"cluster version is updated","cluster-version":"3.6"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:45.952729+0900","caller":"version/monitor.go:116","msg":"cluster version differs from storage version.","cluster-version":"3.6.0","storage-version":"3.5.0"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[30951]: {"level":"info","ts":"2025-07-15T00:48:45.952863+0900","caller":"schema/migration.go:65","msg":"updated storage version","new-storage-version":"3.6.0"}
root@QEMU-Virtual-Machine:/etc/systemd/system#
root@QEMU-Virtual-Machine:/etc/systemd/system#
root@QEMU-Virtual-Machine:/etc/systemd/system# systemctl status etcd2
● etcd2.service - etcd instance 2
     Loaded: loaded (/etc/systemd/system/etcd2.service; enabled; preset: enabled)
     Active: active (running) since Tue 2025-07-15 00:48:37 KST; 31s ago
 Invocation: eb34f34340484c41894eaec65fed3e74
   Main PID: 31108 (etcd)
      Tasks: 11 (limit: 8745)
     Memory: 11.6M (peak: 12.2M)
        CPU: 1.208s
     CGroup: /system.slice/etcd2.service
             └─31108 /usr/local/bin/etcd --name etcd2 --data-dir /var/lib/etcd2 --listen-peer-urls http://127.0.0.1:2480 --listen-client-urls http://127.0.0.1:2479 --advertise-client-urls http://127.0.0.1:2479 --initial-advertise-peer-urls http://127.0.0.1:2480 --initial-cluster etcd1=http://127.0.0.1:2380,etcd2=http://127.0.0.1:2480,etcd3=http://127.0.0.1:2580 --initial-cluster-token single-machine-cluster --initial-clu>

Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:44.792708+0900","caller":"rafthttp/peer_status.go:53","msg":"peer became active","peer-id":"d6b6fc75feb1ea61"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:44.792729+0900","caller":"rafthttp/stream.go:273","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream Message","local-member-id":"2c5b13349b914f7c","remote-peer-id":"d6b6fc75feb1ea61"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:44.792736+0900","caller":"rafthttp/stream.go:248","msg":"set message encoder","from":"2c5b13349b914f7c","to":"d6b6fc75feb1ea61","stream-type":"stream MsgApp v2"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:44.792745+0900","caller":"rafthttp/stream.go:273","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream MsgApp v2","local-member-id":"2c5b13349b914f7c","remote-peer-id":"d6b6fc75feb1ea61"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:44.853106+0900","caller":"rafthttp/stream.go:411","msg":"established TCP streaming connection with remote peer","stream-reader-type":"stream Message","local-member-id":"2c5b13349b914f7c","remote-peer-id":"d6b6fc75feb1ea61"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:44.853138+0900","caller":"rafthttp/stream.go:411","msg":"established TCP streaming connection with remote peer","stream-reader-type":"stream MsgApp v2","local-member-id":"2c5b13349b914f7c","remote-peer-id":"d6b6fc75feb1ea61"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:45.952663+0900","caller":"membership/cluster.go:673","msg":"updated cluster version","cluster-id":"af7e1200b66aabd1","local-member-id":"2c5b13349b914f7c","from":"3.0","to":"3.6"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:45.952760+0900","caller":"api/capability.go:76","msg":"enabled capabilities for version","cluster-version":"3.6"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:45.952837+0900","caller":"version/monitor.go:116","msg":"cluster version differs from storage version.","cluster-version":"3.6.0","storage-version":"3.5.0"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[31108]: {"level":"info","ts":"2025-07-15T00:48:45.952880+0900","caller":"schema/migration.go:65","msg":"updated storage version","new-storage-version":"3.6.0"}

root@QEMU-Virtual-Machine:/etc/systemd/system#
root@QEMU-Virtual-Machine:/etc/systemd/system#
root@QEMU-Virtual-Machine:/etc/systemd/system# systemctl status etcd3
● etcd3.service - etcd instance 3
     Loaded: loaded (/etc/systemd/system/etcd3.service; enabled; preset: enabled)
     Active: active (running) since Tue 2025-07-15 00:48:44 KST; 29s ago
 Invocation: e05bc4a064ff4359b3188cbe548dd98c
   Main PID: 31415 (etcd)
      Tasks: 11 (limit: 8745)
     Memory: 9.8M (peak: 10.6M)
        CPU: 1.142s
     CGroup: /system.slice/etcd3.service
             └─31415 /usr/local/bin/etcd --name etcd3 --data-dir /var/lib/etcd3 --listen-peer-urls http://127.0.0.1:2580 --listen-client-urls http://127.0.0.1:2579 --advertise-client-urls http://127.0.0.1:2579 --initial-advertise-peer-urls http://127.0.0.1:2580 --initial-cluster etcd1=http://127.0.0.1:2380,etcd2=http://127.0.0.1:2480,etcd3=http://127.0.0.1:2580 --initial-cluster-token single-machine-cluster --initial-clu>

Jul 15 00:48:44 QEMU-Virtual-Machine systemd[1]: Started etcd3.service - etcd instance 3.
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:44.843171+0900","caller":"etcdserver/server.go:522","msg":"initialized peer connections; fast-forwarding election ticks","local-member-id":"d6b6fc75feb1ea61","forward-ticks":8,"forward-duration":"800ms","election-ticks":10,"election-timeout":"1s","active-remote-members":2}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:44.853066+0900","caller":"rafthttp/stream.go:248","msg":"set message encoder","from":"d6b6fc75feb1ea61","to":"2c5b13349b914f7c","stream-type":"stream Message"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:44.853106+0900","caller":"rafthttp/stream.go:273","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream Message","local-member-id":"d6b6fc75feb1ea61","remote-peer-id":"2c5b13349b914f7c"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:44.853108+0900","caller":"rafthttp/stream.go:248","msg":"set message encoder","from":"d6b6fc75feb1ea61","to":"2c5b13349b914f7c","stream-type":"stream MsgApp v2"}
Jul 15 00:48:44 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:44.853125+0900","caller":"rafthttp/stream.go:273","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream MsgApp v2","local-member-id":"d6b6fc75feb1ea61","remote-peer-id":"2c5b13349b914f7c"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:45.952737+0900","caller":"membership/cluster.go:673","msg":"updated cluster version","cluster-id":"af7e1200b66aabd1","local-member-id":"d6b6fc75feb1ea61","from":"3.0","to":"3.6"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:45.953168+0900","caller":"api/capability.go:76","msg":"enabled capabilities for version","cluster-version":"3.6"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:45.953228+0900","caller":"version/monitor.go:116","msg":"cluster version differs from storage version.","cluster-version":"3.6.0","storage-version":"3.5.0"}
Jul 15 00:48:45 QEMU-Virtual-Machine etcd[31415]: {"level":"info","ts":"2025-07-15T00:48:45.953290+0900","caller":"schema/migration.go:65","msg":"updated storage version","new-storage-version":"3.6.0"}

 

#etcd Cluster 상태 확인
etcdctl --endpoints=127.0.0.1:2379,127.0.0.1:2479,127.0.0.1:2579 endpoint status --write-out=table

#etcdctl 확인
etcdctl --endpoints=127.0.0.1:2379,127.0.0.1:2479,127.0.0.1:2579 endpoint health
127.0.0.1:2479 is healthy: successfully committed proposal: took = 5.062031ms
127.0.0.1:2379 is healthy: successfully committed proposal: took = 4.988364ms
127.0.0.1:2579 is healthy: successfully committed proposal: took = 5.038531ms

etcd cluster 설정 확인

 

 

Patroni 설치

 

patroni/postgres0.yml at master · patroni/patroni

A template for PostgreSQL High Availability with Etcd, Consul, ZooKeeper, or Kubernetes - patroni/patroni

github.com

 

  • 기본적인 설치 내용들
sudo apt install python3-pip -y
sudo apt-get install python3-psycopg2 
sudo apt-get install patroni

 

  • 각 node 들의 yml 정보
root@QEMU-Virtual-Machine:/data# cat /etc/patroni1.yml
scope: pg-cluster
name: node1

restapi:
  listen: 0.0.0.0:8008
  connect_address: 127.0.0.1:8008

etcd3:
  hosts: 127.0.0.1:2379,127.0.0.1:2479,127.0.0.1:2579

bootstrap:
  dcs:
    ttl: 30
    loop_wait: 10
    retry_timeout: 10
    maximum_lag_on_failover: 1048576
    postgresql:
      use_pg_rewind: true
      parameters:
        wal_level: replica
        hot_standby: "on"
        max_wal_senders: 10
        max_replication_slots: 10
        wal_keep_size: 512MB

  initdb:
    - encoding: UTF8
    - data-checksums

postgresql:
  listen: 127.0.0.1:5432
  connect_address: 127.0.0.1:5432
  data_dir: /data/pg1
  bin_dir: /usr/lib/postgresql/17/bin
  config_dir: /data/pg1
  authentication:
    superuser:
      username: postgres
      password: supersecret1
    replication:
      username: replicator
      password: replpass1
  parameters:
    unix_socket_directories: '/tmp'
######################################################################
root@QEMU-Virtual-Machine:/data# cat /etc/patroni2.yml
scope: pg-cluster
name: node2

restapi:
  listen: 0.0.0.0:8018
  connect_address: 127.0.0.1:8018

etcd3:
  hosts: 127.0.0.1:2379,127.0.0.1:2479,127.0.0.1:2579

bootstrap:
  dcs:
    ttl: 30
    loop_wait: 10
    retry_timeout: 10
    maximum_lag_on_failover: 1048576
    postgresql:
      use_pg_rewind: true
      parameters:
        wal_level: replica
        hot_standby: "on"
        max_wal_senders: 10
        max_replication_slots: 10
        wal_keep_size: 512MB

  initdb:
    - encoding: UTF8
    - data-checksums

postgresql:
  listen: 127.0.0.1:5433
  connect_address: 127.0.0.1:5433
  data_dir: /data/pg2
  bin_dir: /usr/lib/postgresql/17/bin
  config_dir: /data/pg2
  authentication:
    superuser:
      username: postgres
      password: supersecret2
    replication:
      username: replicator
      password: replpass2
  parameters:
    unix_socket_directories: '/tmp'

######################################################################
root@QEMU-Virtual-Machine:/data# cat /etc/patroni3.yml
scope: pg-cluster
name: node3

restapi:
  listen: 0.0.0.0:8028
  connect_address: 127.0.0.1:8028

etcd3:
  hosts: 127.0.0.1:2379,127.0.0.1:2479,127.0.0.1:2579

bootstrap:
  dcs:
    ttl: 30
    loop_wait: 10
    retry_timeout: 10
    maximum_lag_on_failover: 1048576
    postgresql:
      use_pg_rewind: true
      parameters:
        wal_level: replica
        hot_standby: "on"
        max_wal_senders: 10
        max_replication_slots: 10
        wal_keep_size: 512MB

  initdb:
    - encoding: UTF8
    - data-checksums

postgresql:
  listen: 127.0.0.1:5434
  connect_address: 127.0.0.1:5434
  data_dir: /data/pg3
  bin_dir: /usr/lib/postgresql/17/bin
  config_dir: /data/pg3
  authentication:
    superuser:
      username: postgres
      password: supersecret3
    replication:
      username: replicator
      password: replpass3
  parameters:
    unix_socket_directories: '/tmp'

 

  • Patroni 실행 진행 (서비스로 구성은 추후에 시도 예정)
    • 반드시 screen 으로 하는 것으로 하는 것을 추천 (가급적이면 서비스로 구성하여 로그들 확인)
# primary 로 정상적으로 로그가 발생하면 나머지 2,3 번 replica 들을 실행하는 것을 추천
root@QEMU-Virtual-Machine:/tmp/etcd-download-test# sudo -u postgres patroni /etc/patroni1.yml

root@QEMU-Virtual-Machine:/tmp/etcd-download-test# sudo -u postgres patroni /etc/patroni2.yml
root@QEMU-Virtual-Machine:/tmp/etcd-download-test# sudo -u postgres patroni /etc/patroni3.yml

#정상적으로 올라왔는지 확인하는 명령어
root@QEMU-Virtual-Machine:/data# patronictl -c /etc/patroni1.yml list
+ Cluster: pg-cluster (7527206046275954701) ----+----+-----------+
| Member | Host           | Role    | State     | TL | Lag in MB |
+--------+----------------+---------+-----------+----+-----------+
| node1  | 127.0.0.1:5432 | Leader  | running   |  1 |           |
| node2  | 127.0.0.1:5433 | Replica | streaming |  1 |         0 |
| node3  | 127.0.0.1:5434 | Replica | streaming |  1 |         0 |
+--------+----------------+---------+-----------+----+-----------+
  • 정상적으로 실행된 로그들
root@QEMU-Virtual-Machine:/etc/systemd/system# sudo -u postgres patroni /etc/patroni1.yml
2025-07-15 15:34:06,108 INFO: Selected new etcd server http://127.0.0.1:2579
2025-07-15 15:34:06,152 INFO: No PostgreSQL configuration items changed, nothing to reload.
2025-07-15 15:34:06,195 INFO: Lock owner: None; I am node1
2025-07-15 15:34:06,281 INFO: trying to bootstrap a new cluster
The files belonging to this database system will be owned by user "postgres".
This user must also own the server process.

The database cluster will be initialized with locale "en_US.UTF-8".
The default text search configuration will be set to "english".

Data page checksums are enabled.

creating directory /data/pg1 ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default "max_connections" ... 100
selecting default "shared_buffers" ... 128MB
selecting default time zone ... Asia/Seoul
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok

initdb: warning: enabling "trust" authentication for local connections
initdb: hint: You can change this by editing pg_hba.conf or using the option -A, or --auth-local and --auth-host, the next time you run initdb.

Success. You can now start the database server using:

    /usr/lib/postgresql/17/bin/pg_ctl -D /data/pg1 -l logfile start

2025-07-15 15:34:06.872 KST [36610] LOG:  starting PostgreSQL 17.5 (Ubuntu 17.5-0ubuntu0.25.04.1) on aarch64-unknown-linux-gnu, compiled by gcc (Ubuntu 14.2.0-19ubuntu2) 14.2.0, 64-bit
2025-07-15 15:34:06.872 KST [36610] LOG:  listening on IPv4 address "127.0.0.1", port 5433
2025-07-15 15:34:06.873 KST [36610] LOG:  listening on Unix socket "/var/run/postgresql/.s.PGSQL.5433"
2025-07-15 15:34:06.876 KST [36613] LOG:  database system was shut down at 2025-07-15 15:34:06 KST
2025-07-15 15:34:06,880 INFO: postmaster pid=36610
2025-07-15 15:34:06.880 KST [36610] LOG:  database system is ready to accept connections
localhost:5433 - accepting connections
localhost:5433 - accepting connections
2025-07-15 15:34:06,895 INFO: establishing a new patroni heartbeat connection to postgres
2025-07-15 15:34:06,942 INFO: running post_bootstrap
2025-07-15 15:34:06,950 WARNING: Could not activate Linux watchdog device: Can't open watchdog device: [Errno 2] No such file or directory: '/dev/watchdog'
2025-07-15 15:34:07.096 KST [36611] LOG:  checkpoint starting: force wait
2025-07-15 15:34:07,125 INFO: initialized a new cluster
2025-07-15 15:34:07.541 KST [36611] LOG:  checkpoint complete: wrote 7 buffers (0.0%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.408 s, sync=0.008 s, total=0.445 s; sync files=6, longest=0.005 s, average=0.002 s; distance=8984 kB, estimate=8984 kB; lsn=0/2000080, redo lsn=0/2000028
2025-07-15 15:34:07.541 KST [36611] LOG:  checkpoint starting: force wait
2025-07-15 15:34:07.545 KST [36611] LOG:  checkpoint complete: wrote 0 buffers (0.0%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.001 s, sync=0.001 s, total=0.005 s; sync files=0, longest=0.000 s, average=0.000 s; distance=0 kB, estimate=8085 kB; lsn=0/2000150, redo lsn=0/20000F8


#Replica node log들
root@QEMU-Virtual-Machine:/tmp/etcd-download-test# sudo -u postgres patroni /etc/patroni3.yml
2025-07-15 16:53:53,208 INFO: Selected new etcd server http://127.0.0.1:2379
2025-07-15 16:53:53,253 INFO: No PostgreSQL configuration items changed, nothing to reload.
2025-07-15 16:53:53,296 INFO: Lock owner: node1; I am node3
2025-07-15 16:53:53,339 INFO: trying to bootstrap from leader 'node1'
2025-07-15 16:53:53,548 INFO: replica has been created using basebackup
2025-07-15 16:53:53,549 INFO: bootstrapped from leader 'node1'
2025-07-15 16:53:53,741 INFO: postmaster pid=39826
localhost:5434 - no response
2025-07-15 16:53:53.772 KST [39826] LOG:  starting PostgreSQL 17.5 (Ubuntu 17.5-0ubuntu0.25.04.1) on aarch64-unknown-linux-gnu, compiled by gcc (Ubuntu 14.2.0-19ubuntu2) 14.2.0, 64-bit
2025-07-15 16:53:53.772 KST [39826] LOG:  listening on IPv4 address "127.0.0.1", port 5434
2025-07-15 16:53:53.773 KST [39826] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5434"
2025-07-15 16:53:53.775 KST [39830] LOG:  database system was interrupted; last known up at 2025-07-15 16:53:53 KST
2025-07-15 16:53:53.857 KST [39830] LOG:  starting backup recovery with redo LSN 0/8000028, checkpoint LSN 0/8000080, on timeline ID 1
2025-07-15 16:53:53.858 KST [39830] LOG:  entering standby mode
2025-07-15 16:53:53.860 KST [39830] LOG:  redo starts at 0/8000028
2025-07-15 16:53:53.861 KST [39830] LOG:  completed backup recovery with redo LSN 0/8000028 and end LSN 0/8000120
2025-07-15 16:53:53.861 KST [39830] LOG:  consistent recovery state reached at 0/8000120
2025-07-15 16:53:53.861 KST [39826] LOG:  database system is ready to accept read-only connections
2025-07-15 16:53:53.867 KST [39831] LOG:  started streaming WAL from primary at 0/9000000 on timeline 1
localhost:5434 - accepting connections
localhost:5434 - accepting connections
2025-07-15 16:53:54,794 INFO: Lock owner: node1; I am node3
2025-07-15 16:53:54,794 INFO: establishing a new patroni heartbeat connection to postgres
2025-07-15 16:53:54,867 INFO: no action. I am (node3), a secondary, and following a leader (node1)
2025-07-15 16:53:59,529 INFO: no action. I am (node3), a secondary, and following a leader (node1)
2025-07-15 16:54:10,074 INFO: no action. I am (node3), a secondary, and following a leader (node1)
2025-07-15 16:54:20,030 INFO: no action. I am (node3), a secondary, and following a leader (node1)
2025-07-15 16:54:30,074 INFO: no action. I am (node3), a secondary, and following a leader (node1)

 

 

Patroni Trouble shooting

  • Patroni 구성하면서 발생한 문제들에 대해 정리
  • 자료가 많지 않아, 구성 중에 설정값 누락도 많이 있었으며, 권한에 대한 이슈도 존재한 것을 작성

Case 1.

root@QEMU-Virtual-Machine:/etc/systemd/system# patroni /etc/patroni1.yml
2025-07-15 15:32:49,636 ERROR: Failed to get list of machines from http://127.0.0.1:2479/v2: EtcdException('Bad response : 404 page not found\n')
2025-07-15 15:32:49,637 ERROR: Failed to get list of machines from http://127.0.0.1:2379/v2: EtcdException('Bad response : 404 page not found\n')
2025-07-15 15:32:49,638 ERROR: Failed to get list of machines from http://127.0.0.1:2579/v2: EtcdException('Bad response : 404 page not found\n')
2025-07-15 15:32:49,638 INFO: waiting on etcd
2025-07-15 15:32:54,641 ERROR: Failed to get list of machines from http://127.0.0.1:2479/v2: EtcdException('Bad response : 404 page not found\n')
2025-07-15 15:32:54,645 ERROR: Failed to get list of machines from http://127.0.0.1:2379/v2: EtcdException('Bad response : 404 page not found\n')
2025-07-15 15:32:54,649 ERROR: Failed to get list of machines from http://127.0.0.1:2579/v2: EtcdException('Bad response : 404 page not found\n')
  • 기존 가이드들 보면, 다음과 같이 `etcd` 라고 명칭하지만 이것을 `etcd3` 로 변경하면 해결
root@QEMU-Virtual-Machine:/data# cat /etc/patroni1.yml
scope: pg-cluster
name: node1

restapi:
  listen: 0.0.0.0:8008
  connect_address: 127.0.0.1:8008

etcd3:  #<----- 일반 문서에는 etcd: 로 되어 있음
  hosts: 127.0.0.1:2379,127.0.0.1:2479,127.0.0.1:2579

 

Case 2.

  • root 에서 patroni 실행 시 정상 실행이 안되는 경우
  • postgres 계정으로 전환하여 진행
실행 명령어 및 로그 내용
root@QEMU-Virtual-Machine:/tmp/etcd-download-test# patroni /etc/patroni3.yml
2025-07-15 01:57:32,711 INFO: Lock owner: None; I am node3

--> 계속 이러한 로그만 나오고 정상적으로 실행되는 어떠한 내용도 확인이 되지 않음

# 아래 처럼 postgres 계정으로 서비스 시작 진행
root@QEMU-Virtual-Machine:/tmp/etcd-download-test# sudo -u postgres patroni /etc/patroni3.yml

 

Case 3.

  • Patroni node 3번이 실행 도중 다음과 같은 에러가 발생하며 문제를 발생
2025-07-15 16:31:47,698 INFO: doing crash recovery in a single user mode
2025-07-15 16:31:47,713 ERROR: Error when reading postmaster.opts
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/patroni/postgresql/rewind.py", line 545, in read_postmaster_opts
with open(os.path.join(self._postgresql.data_dir, 'postmaster.opts')) as f:
~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '/data/pg3/postmaster.opts'
2025-07-15 16:31:47,719 ERROR: Crash recovery finished with code=1
2025-07-15 16:31:47,719 INFO: stdout=
2025-07-15 16:31:47,719 INFO: stderr=2025-07-15 16:31:47.718 KST [39181] FATAL: data directory "/data/pg3" has invalid permissions
2025-07-15 16:31:47.718 KST [39181] DETAIL: Permissions should be u=rwx (0700) or u=rwx,g=rx (0750).

 

  • 권한 이슈로 다음과 같이 실행 후 다시 진행하여 해결 완료
    • DB가 시작되지 못해서 발생
# 소유권 변경 (root로 실행)
chown -R postgres:postgres /data/pg3

# 권한 변경
chmod 700 /data/pg3


root@QEMU-Virtual-Machine:/data# patronictl -c /etc/patroni1.yml list
+ Cluster: pg-cluster (7527206046275954701) -------+----+-----------+
| Member | Host           | Role    | State        | TL | Lag in MB |
+--------+----------------+---------+--------------+----+-----------+
| node1  | 127.0.0.1:5432 | Leader  | running      |  1 |           |
| node2  | 127.0.0.1:5433 | Replica | streaming    |  1 |         0 |
| node3  | 127.0.0.1:5434 | Replica | start failed |    |   unknown |
+--------+----------------+---------+--------------+----+-----------+

## 이슈 해결
root@QEMU-Virtual-Machine:/data# screen -R patroni3
[detached from 33456.patroni3]
root@QEMU-Virtual-Machine:/data# patronictl -c /etc/patroni1.yml list
+ Cluster: pg-cluster (7527206046275954701) ----+----+-----------+
| Member | Host           | Role    | State     | TL | Lag in MB |
+--------+----------------+---------+-----------+----+-----------+
| node1  | 127.0.0.1:5432 | Leader  | running   |  1 |           |
| node2  | 127.0.0.1:5433 | Replica | streaming |  1 |         0 |
| node3  | 127.0.0.1:5434 | Replica | streaming |  1 |         0 |
+--------+----------------+---------+-----------+----+-----------+

 

Case 4.

  • log 상에서 다음과 같은 에러가 보인다면, 해당 secondary node 를 재 구축 진행
  • 운영에서는 최신 백업으로 복구 후에 진행하는 것이 시간 단축에 도움
  • 초기 과정에서는 해당 노드를 삭제하고 다시 구성하는 것이 빠르고, 깔끔한 방법
## Error log (replica node)
2025-07-15 16:50:13.401 KST [39717] FATAL:  could not receive data from WAL stream: ERROR:  requested starting point 0/6000000 is ahead of the WAL flush position of this server 0/5050660

## 이 오류 메시지는 PostgreSQL 스트리밍 복제가 시작될 때 레플리카 노드가 요청한 WAL 시점이 프라이머리 노드가 아직 생성하지 않은 LSN(Log Sequence Number)에 있다는 것을 의미(제대로 복제 데이터를 가져오지 못한 현상)
################################################################################
## 삭제 후 재구축
# 레플리카 노드에서 실행 (중요: primary가 정상 상태여야 함)
# patroni 를 강제 중지 또는
sudo systemctl stop patroni3  # 프로세스 정지

# 데이터 디렉토리 정리 (중요: 실수로 primary에서 하지 않도록 주의)
rm -rf /data/pg3/*

# 또는 수동으로 초기화된 basebackup
sudo -u postgres pg_basebackup -h <primary_ip> -D /data/pg3 -U replicator -P -R

# 퍼미션 재확인
chown -R postgres:postgres /data/pg3
chmod 700 /data/pg3

# Patroni 재시작
sudo systemctl start patroni 또는
root@QEMU-Virtual-Machine:/tmp/etcd-download-test# sudo -u postgres patroni /etc/patroni3.yml

 

반응형

+ Recent posts