解决 kubeasz 安装k8s集群跨节点pod 无法使用cluster ip通讯问题

问题描述

使用kubeasz搭建k8s集群后使用的配置文件

# 'etcd' cluster should have odd member(s) (1,3,5,...)
[etcd]
192.168.xx.22

# master node(s)
[kube_master]
192.168.xx.22

# work node(s)
[kube_node]
192.168.xx.9
192.168.xx.22

# [optional] harbor server, a private docker registry
# 'NEW_INSTALL': 'true' to install a harbor server; 'false' to integrate with existed one
[harbor]
#192.168.1.8 NEW_INSTALL=false

# [optional] loadbalance for accessing k8s from outside
[ex_lb]
#192.168.1.6 LB_ROLE=backup EX_APISERVER_VIP=192.168.1.250 EX_APISERVER_PORT=8443
#192.168.1.7 LB_ROLE=master EX_APISERVER_VIP=192.168.1.250 EX_APISERVER_PORT=8443

# [optional] ntp server for the cluster
[chrony]
#192.168.1.1

[all:vars]
# --------- Main Variables ---------------
# Secure port for apiservers
SECURE_PORT="6443"

# Cluster container-runtime supported: docker, containerd
# if k8s version >= 1.24, docker is not supported
CONTAINER_RUNTIME="docker"

# Network plugins supported: calico, flannel, kube-router, cilium, kube-ovn
CLUSTER_NETWORK="calico"

# Service proxy mode of kube-proxy: 'iptables' or 'ipvs'
PROXY_MODE="ipvs"

# K8S Service CIDR, not overlap with node(host) networking
SERVICE_CIDR="10.68.0.0/16"

# Cluster CIDR (Pod CIDR), not overlap with node(host) networking
CLUSTER_CIDR="172.20.0.0/16"

# NodePort Range
NODE_PORT_RANGE="30000-32767"

# Cluster DNS Domain
CLUSTER_DNS_DOMAIN="cluster.local"

# -------- Additional Variables (don't change the default value right now) ---
# Binaries Directory
bin_dir="/opt/kube/bin"

# Deploy Directory (kubeasz workspace)
base_dir="/etc/kubeasz"

# Directory for a specific cluster
cluster_dir="{{ base_dir }}/clusters/k8s"

k get node 看到所有集群节点

[root@feiteng ~]# k get node
NAME STATUS ROLES AGE VERSION
192.168.xx.9 Ready worker 14d v1.23.16
192.168.xx.22 Ready master 40d v1.23.16

在多节点部署前端服务时发现nginx pod里发现跨节点请求出现connect to 10.68.94.12 port 39000 failed: No route to host ，其中10.68.94.12 是clusterIp, dns解析是正确的

问题排查

排查使用命令查看docker subnet

docker inspect bridge |grep Subnet

在集群两台机器上是相同的，集群部署的网络插件calico没有正确的做所需的配置，默认情况下每个节点上部署的docker都会使用172.17.0.0/24这个子网给容器使用，但是在集群环境下，每个节点就需要分别使用不同的子网了，要不然就会冲突了。

因为我默认使用的calico进行配置的，与flannel 相比我更熟悉后者，所以还是更换回flannel

解决问题

1 把calico相关的pods停掉

部署上kube-flannel的时候需要看一下当前环境中kube-controller-manager服务中配置的cluster-cidr参数指定的子网是什么，需要在kube-flannel的部署yaml中将子网也指定为这个子网。

kubectl get nodes -o jsonpath='{.items[*].spec.podCIDR}'

kubectl get nodes -o jsonpath='{.items[*].spec.podCIDR}'
172.20.0.0/24

查看CIDR 使用172.20.0.0/24，把该址修改到下面flannel的部署文件这个位置上

net-conf.json: |
    {
      "Network": "172.20.0.0/24",
      "EnableNFTables": false,
      "Backend": {
        "Type": "vxlan"
      }
    }

部署yaml文件

apiVersion: v1
kind: Namespace
metadata:
  labels:
    k8s-app: flannel
    pod-security.kubernetes.io/enforce: privileged
  name: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: flannel
  name: flannel
  namespace: kube-flannel
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: flannel
  name: flannel
rules:
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: flannel
  name: flannel
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: flannel
subjects:
- kind: ServiceAccount
  name: flannel
  namespace: kube-flannel
---
apiVersion: v1
data:
  cni-conf.json: |
    {
      "name": "cbr0",
      "cniVersion": "0.3.1",
      "plugins": [
        {
          "type": "flannel",
          "delegate": {
            "hairpinMode": true,
            "isDefaultGateway": true
          }
        },
        {
          "type": "portmap",
          "capabilities": {
            "portMappings": true
          }
        }
      ]
    }
  net-conf.json: |
    {
      "Network": "10.244.0.0/16",
      "EnableNFTables": false,
      "Backend": {
        "Type": "vxlan"
      }
    }
kind: ConfigMap
metadata:
  labels:
    app: flannel
    k8s-app: flannel
    tier: node
  name: kube-flannel-cfg
  namespace: kube-flannel
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  labels:
    app: flannel
    k8s-app: flannel
    tier: node
  name: kube-flannel-ds
  namespace: kube-flannel
spec:
  selector:
    matchLabels:
      app: flannel
      k8s-app: flannel
  template:
    metadata:
      labels:
        app: flannel
        k8s-app: flannel
        tier: node
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/os
                operator: In
                values:
                - linux
      containers:
      - args:
        - --ip-masq
        - --kube-subnet-mgr
        command:
        - /opt/bin/flanneld
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: EVENT_QUEUE_DEPTH
          value: "5000"
        image: ghcr.io/flannel-io/flannel:v0.26.4
        name: kube-flannel
        resources:
          requests:
            cpu: 100m
            memory: 50Mi
        securityContext:
          capabilities:
            add:
            - NET_ADMIN
            - NET_RAW
          privileged: false
        volumeMounts:
        - mountPath: /run/flannel
          name: run
        - mountPath: /etc/kube-flannel/
          name: flannel-cfg
        - mountPath: /run/xtables.lock
          name: xtables-lock
      hostNetwork: true
      initContainers:
      - args:
        - -f
        - /flannel
        - /opt/cni/bin/flannel
        command:
        - cp
        image: ghcr.io/flannel-io/flannel-cni-plugin:v1.6.2-flannel1
        name: install-cni-plugin
        volumeMounts:
        - mountPath: /opt/cni/bin
          name: cni-plugin
      - args:
        - -f
        - /etc/kube-flannel/cni-conf.json
        - /etc/cni/net.d/10-flannel.conflist
        command:
        - cp
        image: ghcr.io/flannel-io/flannel:v0.26.4
        name: install-cni
        volumeMounts:
        - mountPath: /etc/cni/net.d
          name: cni
        - mountPath: /etc/kube-flannel/
          name: flannel-cfg
      priorityClassName: system-node-critical
      serviceAccountName: flannel
      tolerations:
      - effect: NoSchedule
        operator: Exists
      volumes:
      - hostPath:
          path: /run/flannel
        name: run
      - hostPath:
          path: /opt/cni/bin
        name: cni-plugin
      - hostPath:
          path: /etc/cni/net.d
        name: cni
      - configMap:
          name: kube-flannel-cfg
        name: flannel-cfg
      - hostPath:
          path: /run/xtables.lock
          type: FileOrCreate
        name: xtables-lock

kube-flannel-ds 部署成功，副本在所有节点上

在2个节点的/var/run/flannel/subnet.env文件中会显示flannel自动将上面那个子网中的一部分划分到每个节点上，修改 /var/run/flannel/subnet.env

一台机器上

FLANNEL_NETWORK=172.20.0.0/16
FLANNEL_SUBNET=172.20.0.1/24
FLANNEL_MTU=1450
FLANNEL_IPMASQ=true

另一台机器上

FLANNEL_NETWORK=172.20.0.0/16
FLANNEL_SUBNET=172.20.1.1/24
FLANNEL_MTU=1450
FLANNEL_IPMASQ=true

不同节点配置不同FLANNEL_SUBNET

各参数的作用

FLANNEL_NETWORK=172.20.0.0/16

含义：定义 Flannel 的整个 Pod 网络范围（即 cluster-cidr），所有节点的 Pod IP 都会从这个 CIDR 分配。
作用：将 Flannel 的 Pod 网络设置为 172.20.0.0/16，范围从 172.20.0.0 到 172.20.255.255，总共可容纳 65,536 个 IP 地址。这与 kube-controller-manager 的 --cluster-cidr 或 Flannel 的 net-conf.json 中的 "Network" 值对应。
修改影响：

如果之前是其他值（例如 10.244.0.0/16），Pod IP 将从新的范围分配。
需要确保与集群的 cluster-cidr 一致，否则会导致网络不可用。