- Kubernetes cluster backup and restore using “etcdctl” tool
- Kubernetes cluster backup and restore using Velero
ssh aen@c1-cp1 | |
cd ~/content/course/02/demo | |
#Note: this restore process is for a locally hosted etcd running in a static pod. | |
#Check out some of the key etcd configuration information | |
#Container image and tag, command, --data dir, and mounts and volumes for both etcd-certs and etcd-data | |
kubectl describe pod etcd-c1-cp1 -n kube-system | |
#The configuration for etcd comes from the static pod manifest, check out the listen-client-urls, data-dir, volumeMounts, volumes/ | |
sudo more /etc/kubernetes/manifests/etcd.yaml | |
#You can get the runtime values from ps -aux | |
ps -aux | grep etcd | |
#Let's get etcdctl on our local system here...by downloading it from github. | |
#TODO: Update RELEASE to match your release version!!! | |
#We can find out the version of etcd we're running by using etcd --version inside the etcd pod. | |
kubectl exec -it etcd-c1-cp1 -n kube-system -- /bin/sh -c 'ETCDCTL_API=3 /usr/local/bin/etcd --version' | head | |
export RELEASE="3.5.1" | |
wget https://github.com/etcd-io/etcd/releases/download/v${RELEASE}/etcd-v${RELEASE}-linux-amd64.tar.gz | |
tar -zxvf etcd-v${RELEASE}-linux-amd64.tar.gz | |
cd etcd-v${RELEASE}-linux-amd64 | |
sudo cp etcdctl /usr/local/bin | |
#Quick check to see if we have etcdctl... | |
ETCDCTL_API=3 etcdctl --help | head | |
#First, let's create create a secret that we're going to delete and then get back when we run the restore. | |
kubectl create secret generic test-secret \ | |
--from-literal=username='svcaccount' \ | |
--from-literal=password='S0mthingS0Str0ng!' | |
#Define a variable for the endpoint to etcd | |
ENDPOINT=https://127.0.0.1:2379 | |
#Verify we're connecting to the right cluster...define your endpoints and keys | |
sudo ETCDCTL_API=3 etcdctl --endpoints=$ENDPOINT \ | |
--cacert=/etc/kubernetes/pki/etcd/ca.crt \ | |
--cert=/etc/kubernetes/pki/etcd/server.crt \ | |
--key=/etc/kubernetes/pki/etcd/server.key \ | |
member list | |
#Take the backup saving it to /var/lib/dat-backup.db... | |
#Be sure to copy that to remote storage when doing this for real | |
sudo ETCDCTL_API=3 etcdctl --endpoints=$ENDPOINT \ | |
--cacert=/etc/kubernetes/pki/etcd/ca.crt \ | |
--cert=/etc/kubernetes/pki/etcd/server.crt \ | |
--key=/etc/kubernetes/pki/etcd/server.key \ | |
snapshot save /var/lib/dat-backup.db | |
#Read the metadata from the backup/snapshot to print out the snapshot's status | |
sudo ETCDCTL_API=3 etcdctl --write-out=table snapshot status /var/lib/dat-backup.db | |
#now let's delete an object and then run a restore to get it back | |
kubectl delete secret test-secret | |
#Run the restore to a second folder...this will restore to the current directory | |
sudo ETCDCTL_API=3 etcdctl snapshot restore /var/lib/dat-backup.db | |
#Confirm our data is in the restore directory | |
sudo ls -l | |
#Move the old etcd data to a safe location | |
sudo mv /var/lib/etcd /var/lib/etcd.OLD | |
#Restart the static pod for etcd... | |
#if you kubectl delete it will NOT restart the static pod as it's managed by the kubelet not a controller or the control plane. | |
sudo crictl --runtime-endpoint unix:///run/containerd/containerd.sock ps | grep etcd | |
CONTAINER_ID=$(sudo crictl --runtime-endpoint unix:///run/containerd/containerd.sock ps | grep etcd | awk '{ print $1 }') | |
echo $CONTAINER_ID | |
#Stop the etcd container for our etcd pod and move our restored data into place | |
sudo crictl --runtime-endpoint unix:///run/containerd/containerd.sock stop $CONTAINER_ID | |
sudo mv ./default.etcd /var/lib/etcd | |
#Wait for etcd, the scheduler and controller manager to recreate | |
sudo crictl --runtime-endpoint unix:///run/containerd/containerd.sock ps | |
#Is our secret back? This may take a minute or two to come back due to caching. | |
kubectl get secret test-secret | |
#Another common restore method is to update the data-path to the restored data path in the static pod manifest. | |
#The kubelet will restart the pod due to the configuation change | |
#Let's delete an object again then run a restore to get it back | |
kubectl delete secret test-secret | |
#Using the same backup from earlier | |
#Run the restore to a define data-dir, rather than the current working directory | |
sudo ETCDCTL_API=3 etcdctl snapshot restore /var/lib/dat-backup.db --data-dir=/var/lib/etcd-restore | |
#Update the static pod manifest to point to that /var/lib/etcd-restore...in three places | |
#Update | |
# - --data-dir=/var/lib/etcd-restore | |
#... | |
# volumeMounts: | |
# - mountPath: /var/lib/etcd-restore | |
#... | |
# volumes: | |
# - hostPath: | |
# name: etcd-data | |
# path: /var/lib/etcd-restore | |
sudo cp /etc/kubernetes/manifests/etcd.yaml . | |
sudo vi /etc/kubernetes/manifests/etcd.yaml | |
#This will cause the control plane pods to restart...let's check it at the container runtime level | |
sudo crictl --runtime-endpoint unix:///run/containerd/containerd.sock ps | |
#Is our secret back? | |
kubectl get secret test-secret | |
#remove etcdctl from the Control Plane Node node if you want. | |
#Put back the original etcd.yaml | |
kubectl delete secret test-secret | |
sudo cp etcd.yaml /etc/kubernetes/manifests/ | |
sudo rm /var/lib/dat-backup.db | |
sudo rm /usr/local/bin/etcdctl | |
sudo rm -rf /var/lib/etcd.OLD | |
sudo rm -rf /var/lib/etcd-restore | |
rm ~/content/course/02/demo/etcd-v${RELEASE}-linux-amd64.tar.gz | |
apiVersion: v1 | |
kind: Pod | |
metadata: | |
annotations: | |
kubeadm.kubernetes.io/etcd.advertise-client-urls: https://172.16.94.10:2379 | |
creationTimestamp: null | |
labels: | |
component: etcd | |
tier: control-plane | |
name: etcd | |
namespace: kube-system | |
spec: | |
containers: | |
- command: | |
- etcd | |
- --advertise-client-urls=https://172.16.94.10:2379 | |
- --cert-file=/etc/kubernetes/pki/etcd/server.crt | |
- --client-cert-auth=true | |
- --data-dir=/var/lib/etcd | |
- --initial-advertise-peer-urls=https://172.16.94.10:2380 | |
- --initial-cluster=c1-cp1=https://172.16.94.10:2380 | |
- --key-file=/etc/kubernetes/pki/etcd/server.key | |
- --listen-client-urls=https://127.0.0.1:2379,https://172.16.94.10:2379 | |
- --listen-metrics-urls=http://127.0.0.1:2381 | |
- --listen-peer-urls=https://172.16.94.10:2380 | |
- --name=c1-cp1 | |
- --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt | |
- --peer-client-cert-auth=true | |
- --peer-key-file=/etc/kubernetes/pki/etcd/peer.key | |
- --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt | |
- --snapshot-count=10000 | |
- --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt | |
image: k8s.gcr.io/etcd:3.5.1-0 | |
imagePullPolicy: IfNotPresent | |
livenessProbe: | |
failureThreshold: 8 | |
httpGet: | |
host: 127.0.0.1 | |
path: /health | |
port: 2381 | |
scheme: HTTP | |
initialDelaySeconds: 10 | |
periodSeconds: 10 | |
timeoutSeconds: 15 | |
name: etcd | |
resources: | |
requests: | |
cpu: 100m | |
memory: 100Mi | |
startupProbe: | |
failureThreshold: 24 | |
httpGet: | |
host: 127.0.0.1 | |
path: /health | |
port: 2381 | |
scheme: HTTP | |
initialDelaySeconds: 10 | |
periodSeconds: 10 | |
timeoutSeconds: 15 | |
volumeMounts: | |
- mountPath: /var/lib/etcd | |
name: etcd-data | |
- mountPath: /etc/kubernetes/pki/etcd | |
name: etcd-certs | |
hostNetwork: true | |
priorityClassName: system-node-critical | |
securityContext: | |
seccompProfile: | |
type: RuntimeDefault | |
volumes: | |
- hostPath: | |
path: /etc/kubernetes/pki/etcd | |
type: DirectoryOrCreate | |
name: etcd-certs | |
- hostPath: | |
path: /var/lib/etcd | |
type: DirectoryOrCreate | |
name: etcd-data | |
status: {} |















I’m a DevOps/SRE/DevSecOps/Cloud Expert passionate about sharing knowledge and experiences. I am working at Cotocus. I blog tech insights at DevOps School, travel stories at Holiday Landmark, stock market tips at Stocks Mantra, health and fitness guidance at My Medic Plus, product reviews at I reviewed , and SEO strategies at Wizbrand.
Please find my social handles as below;
Rajesh Kumar Personal Website
Rajesh Kumar at YOUTUBE
Rajesh Kumar at INSTAGRAM
Rajesh Kumar at X
Rajesh Kumar at FACEBOOK
Rajesh Kumar at LINKEDIN
Rajesh Kumar at PINTEREST
Rajesh Kumar at QUORA
Rajesh Kumar at WIZBRAND