Tigera Calico Troubleshooting and diagnostics
#!/bin/bash
if [[ "$1" != "diags" ]]
then
echo "Unsupported command: $1"
exit 1
fi
shift 1
SINCE=0s # Only return logs newer than relative duration such as 5s, 2m, or 3h. Defaults to all logs.
# Parse optional flag(s):
# --since=relative_duration (e.g. 10s, 5m)
while (( "$#" )); do
case "$1" in
--since=*)
str=$1
SINCE="${str#*=}" # grab everything after '='
shift 1
if [[ ! $SINCE =~ ^[0-9]+[smh]$ ]]; then
echo "invalid relative duration, try 10s, 5m, or 1h"
exit 1
fi
;;
-*|--*=) # unsupported flags
echo "Error: Unsupported flag $1" >&2
exit 1
;;
*) # unsupported argument
echo "Error: Unsupported argument $1" >&2
exit 1
;;
esac
done
# Check pre-requisites, like a functioning kubectl.
if [ "$(which kubectl)" == "" ]; then echo "Unable to locate kubectl in PATH"; fi
kubectl get ns 2>&1>/dev/null || if [[ $? != 0 ]]; then echo "kubectl does not appear to be functioning"; exit 1; fi
# Make a tmp dir.
set -e
mtmp=$(mktemp -d)
mkdir ${mtmp}/calico-diagnostics
tmp=${mtmp}/calico-diagnostics
set +e
echo "==== Begin collecting diagnostics. ===="
# Get some basic cluster state.
echo "Collecting basic cluster state..."
kubectl get ns > ${tmp}/namespaces.txt
kubectl get all -n calico-system -o wide > ${tmp}/calico-system.txt
kubectl get all -n tigera-operator -o wide > ${tmp}/tigera-operator.txt
mkdir ${tmp}/operator.tigera.io
kubectl get installations -o yaml > ${tmp}/operator.tigera.io/installations.yaml
kubectl get apiservers -o yaml > ${tmp}/operator.tigera.io/apiservers.yaml
kubectl get compliances -o yaml > ${tmp}/operator.tigera.io/compliances.yaml
kubectl get intrusiondetections -o yaml > ${tmp}/operator.tigera.io/intrusiondetections.yaml
kubectl get managers -o yaml > ${tmp}/operator.tigera.io/managers.yaml
kubectl get logcollectors -o yaml > ${tmp}/operator.tigera.io/logcollectors.yaml
kubectl get logstorages -o yaml > ${tmp}/operator.tigera.io/logstorages.yaml
kubectl get managementclusterconnections -o yaml > ${tmp}/operator.tigera.io/managementclusterconnections.yaml
# Get tigera status.
echo "Collecting TigeraStatus details..."
kubectl get tigerastatus > ${tmp}/tigerastatus.txt
kubectl get tigerastatus -o yaml > ${tmp}/tigerastatus-yaml.txt
# Get nodes.
echo "Collecting Node details..."
kubectl get nodes -o wide > ${tmp}/nodes.txt
kubectl get nodes -o yaml > ${tmp}/nodes-yaml.txt
# Get IPAM information.
echo "Collecting IPAM diagnostics..."
mkdir -p ${tmp}/ipam
kubectl get ipamblocks -o yaml > ${tmp}/ipam/ipamblocks.txt
kubectl get blockaffinities -o yaml > ${tmp}/ipam/blockaffinities.txt
kubectl get ipamhandles -o yaml > ${tmp}/ipam/ipamhandles.txt
# Get operator logs.
echo "Collecting tigera-operator logs..."
kubectl logs --since=$SINCE -n tigera-operator -l k8s-app=tigera-operator > ${tmp}/tigera-operator.logs
# Get typha logs.
echo "Collecting calico/typha logs..."
mkdir -p ${tmp}/typhas
for typha in $(kubectl get pods -n calico-system -l k8s-app=calico-typha -o go-template --template="{{range .items}}{{.metadata.name}} {{end}}"); do
kubectl logs --since=$SINCE -n calico-system $typha > ${tmp}/typhas/${typha}.log
done
# Get per-node logs and network information.
mkdir -p ${tmp}/nodes
for node in $(kubectl get pods -n calico-system -l k8s-app=calico-node -o go-template --template="{{range .items}}{{.metadata.name}} {{end}}"); do
echo "Collecting logs for node: $node"
mkdir -p ${tmp}/nodes/${node}
kubectl logs --since=$SINCE -n calico-system $node > ${tmp}/nodes/${node}/${node}.log
kubectl exec -n calico-system -t $node -- iptables-save -c > ${tmp}/nodes/${node}/iptables-save.txt
kubectl exec -n calico-system -t $node -- ip route > ${tmp}/nodes/${node}/iproute.txt
done
# Tar it all up for easy sharing.
echo ""
echo "==== Producing a diagnostics bundle. ===="
rm -f calico-diagnostics.tar.gz
tar cfz ${mtmp}/calico-diagnostics.tar.gz -C ${mtmp} calico-diagnostics
echo ""
echo "Diagnostic bundle produced at ${mtmp}/calico-diagnostics.tar.gz"
#!/bin/bash | |
if [[ "$1" != "diags" ]] | |
then | |
echo "Unsupported command: $1" | |
exit 1 | |
fi | |
shift 1 | |
SINCE=0s # Only return logs newer than relative duration such as 5s, 2m, or 3h. Defaults to all logs. | |
# Parse optional flag(s): | |
# --since=relative_duration (e.g. 10s, 5m) | |
while (( "$#" )); do | |
case "$1" in | |
--since=*) | |
str=$1 | |
SINCE="${str#*=}" # grab everything after '=' | |
shift 1 | |
if [[ ! $SINCE =~ ^[0-9]+[smh]$ ]]; then | |
echo "invalid relative duration, try 10s, 5m, or 1h" | |
exit 1 | |
fi | |
;; | |
-*|--*=) # unsupported flags | |
echo "Error: Unsupported flag $1" >&2 | |
exit 1 | |
;; | |
*) # unsupported argument | |
echo "Error: Unsupported argument $1" >&2 | |
exit 1 | |
;; | |
esac | |
done | |
# Check pre-requisites, like a functioning kubectl. | |
if [ "$(which kubectl)" == "" ]; then echo "Unable to locate kubectl in PATH"; fi | |
kubectl get ns 2>&1>/dev/null || if [[ $? != 0 ]]; then echo "kubectl does not appear to be functioning"; exit 1; fi | |
# Make a tmp dir. | |
set -e | |
mtmp=$(mktemp -d) | |
mkdir ${mtmp}/calico-diagnostics | |
tmp=${mtmp}/calico-diagnostics | |
set +e | |
echo "==== Begin collecting diagnostics. ====" | |
# Get some basic cluster state. | |
echo "Collecting basic cluster state..." | |
kubectl get ns > ${tmp}/namespaces.txt | |
kubectl get all -n calico-system -o wide > ${tmp}/calico-system.txt | |
kubectl get all -n tigera-operator -o wide > ${tmp}/tigera-operator.txt | |
mkdir ${tmp}/operator.tigera.io | |
kubectl get installations -o yaml > ${tmp}/operator.tigera.io/installations.yaml | |
kubectl get apiservers -o yaml > ${tmp}/operator.tigera.io/apiservers.yaml | |
kubectl get compliances -o yaml > ${tmp}/operator.tigera.io/compliances.yaml | |
kubectl get intrusiondetections -o yaml > ${tmp}/operator.tigera.io/intrusiondetections.yaml | |
kubectl get managers -o yaml > ${tmp}/operator.tigera.io/managers.yaml | |
kubectl get logcollectors -o yaml > ${tmp}/operator.tigera.io/logcollectors.yaml | |
kubectl get logstorages -o yaml > ${tmp}/operator.tigera.io/logstorages.yaml | |
kubectl get managementclusterconnections -o yaml > ${tmp}/operator.tigera.io/managementclusterconnections.yaml | |
# Get tigera status. | |
echo "Collecting TigeraStatus details..." | |
kubectl get tigerastatus > ${tmp}/tigerastatus.txt | |
kubectl get tigerastatus -o yaml > ${tmp}/tigerastatus-yaml.txt | |
# Get nodes. | |
echo "Collecting Node details..." | |
kubectl get nodes -o wide > ${tmp}/nodes.txt | |
kubectl get nodes -o yaml > ${tmp}/nodes-yaml.txt | |
# Get IPAM information. | |
echo "Collecting IPAM diagnostics..." | |
mkdir -p ${tmp}/ipam | |
kubectl get ipamblocks -o yaml > ${tmp}/ipam/ipamblocks.txt | |
kubectl get blockaffinities -o yaml > ${tmp}/ipam/blockaffinities.txt | |
kubectl get ipamhandles -o yaml > ${tmp}/ipam/ipamhandles.txt | |
# Get operator logs. | |
echo "Collecting tigera-operator logs..." | |
kubectl logs --since=$SINCE -n tigera-operator -l k8s-app=tigera-operator > ${tmp}/tigera-operator.logs | |
# Get typha logs. | |
echo "Collecting calico/typha logs..." | |
mkdir -p ${tmp}/typhas | |
for typha in $(kubectl get pods -n calico-system -l k8s-app=calico-typha -o go-template --template="{{range .items}}{{.metadata.name}} {{end}}"); do | |
kubectl logs --since=$SINCE -n calico-system $typha > ${tmp}/typhas/${typha}.log | |
done | |
# Get per-node logs and network information. | |
mkdir -p ${tmp}/nodes | |
for node in $(kubectl get pods -n calico-system -l k8s-app=calico-node -o go-template --template="{{range .items}}{{.metadata.name}} {{end}}"); do | |
echo "Collecting logs for node: $node" | |
mkdir -p ${tmp}/nodes/${node} | |
kubectl logs --since=$SINCE -n calico-system $node > ${tmp}/nodes/${node}/${node}.log | |
kubectl exec -n calico-system -t $node -- iptables-save -c > ${tmp}/nodes/${node}/iptables-save.txt | |
kubectl exec -n calico-system -t $node -- ip route > ${tmp}/nodes/${node}/iproute.txt | |
done | |
# Tar it all up for easy sharing. | |
echo "" | |
echo "==== Producing a diagnostics bundle. ====" | |
rm -f calico-diagnostics.tar.gz | |
tar cfz ${mtmp}/calico-diagnostics.tar.gz -C ${mtmp} calico-diagnostics | |
echo "" | |
echo "Diagnostic bundle produced at ${mtmp}/calico-diagnostics.tar.gz" |
I’m a DevOps/SRE/DevSecOps/Cloud Expert passionate about sharing knowledge and experiences. I am working at Cotocus. I blog tech insights at DevOps School, travel stories at Holiday Landmark, stock market tips at Stocks Mantra, health and fitness guidance at My Medic Plus, product reviews at I reviewed , and SEO strategies at Wizbrand.
Please find my social handles as below;
Rajesh Kumar Personal Website
Rajesh Kumar at YOUTUBE
Rajesh Kumar at INSTAGRAM
Rajesh Kumar at X
Rajesh Kumar at FACEBOOK
Rajesh Kumar at LINKEDIN
Rajesh Kumar at PINTEREST
Rajesh Kumar at QUORA
Rajesh Kumar at WIZBRAND