1

嗨,我正在 k8s pod 中部署一个 etcd 容器,其中 data_dir 映射到为 pod 创建的持久卷声明。第一次创建 pvc 和 pod 时,etcd 服务启动并运行,一切都按预期工作。

一旦我删除了 k8s 部署并再次创建它,它确实在引导过程中识别重新启动现有成员(可能是由于非空 data_dir)但无法启动 etcd 服务并出现意外故障地址错误。

我们目前正在使用单节点 etcd 集群配置,这足以满足我们在单个 pod 中同时拥有服务和 etcd db 的需求。还有一件事,使用persistentvolume的目的是确保pod重启之间没有数据丢失。

etcd 版本:3.3.11

PVC.yml

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: sample-etcd-db-pvc
  annotations:
    volume.beta.kubernetes.io/storage-class: glusterfs-storage
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 1Gi

部署.yml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: sample-etcd-db-service
spec:
  replicas: 1
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1
  selector:
    matchLabels:
      app: sample-etcd-db-service
      version: 0.1.0-rc.29
  template:
    metadata:
      labels:
        app: sample-etcd-db-service
        version: 0.1.0-rc.29
    spec:
      containers:
      - name: sample-etcd-db
        image: quay.io/coreos/etcd:v3.3.11
        imagePullPolicy: IfNotPresent
        command:
        - etcd
        - --name=sample-etcd-db-new
        - --listen-client-urls=http://0.0.0.0:2379
        - --advertise-client-urls=http://0.0.0.0:2379
        - --data-dir=/var/etcd/data
        volumeMounts:
        - mountPath: /var/etcd/data
          name: sample-etcd-db-pvc
        ports:
        - containerPort: 2379
      volumes:
      - name: sample-etcd-db-pvc
        persistentVolumeClaim:
          claimName: sample-etcd-db-pvc

k8s集群第一次创建PVC和Deployment

[root@centos-vm etcd_bug]# kubectl create -f pvc.yml
[root@centos-vm etcd_bug]# kubectl create -f deployment.yml

--- 尝试通过删除并再次创建 k8s 部署来重新启动 etcd ---

[root@centos-vm etcd_bug]# kubectl delete deployment sample-etcd-db-service
[root@centos-vm etcd_bug]# kubectl create -f deployment.yml

Attaching logs from the pod.
[root@centos-vm etcd_bug]# kubectl logs sample-etcd-db-service-98f4f9459-4s27f -c sample-etcd-db
2019-02-19 20:25:22.344488 I | etcdmain: etcd Version: 3.3.11
2019-02-19 20:25:22.344636 I | etcdmain: Git SHA: 2cf9e51
2019-02-19 20:25:22.344645 I | etcdmain: Go Version: go1.10.7
2019-02-19 20:25:22.344651 I | etcdmain: Go OS/Arch: linux/amd64
2019-02-19 20:25:22.344659 I | etcdmain: setting maximum number of CPUs to 8, total number of available CPUs is 8
2019-02-19 20:25:22.348118 N | etcdmain: the server is already initialized as member before, starting as etcd member...
2019-02-19 20:25:22.348516 I | embed: listening for peers on http://0.0.0.0:2380
2019-02-19 20:25:22.349140 I | embed: listening for client requests on 0.0.0.0:2379
2019-02-19 20:25:22.391321 I | etcdserver: name = sample-etcd-db-new
2019-02-19 20:25:22.391362 I | etcdserver: data dir = /var/etcd/data
2019-02-19 20:25:22.391379 I | etcdserver: member dir = /var/etcd/data/member
2019-02-19 20:25:22.391387 I | etcdserver: heartbeat = 100ms
2019-02-19 20:25:22.391394 I | etcdserver: election = 1000ms
2019-02-19 20:25:22.391401 I | etcdserver: snapshot count = 100000
2019-02-19 20:25:22.391423 I | etcdserver: advertise client URLs = http://0.0.0.0:2379
2019-02-19 20:25:22.407858 I | etcdserver: restarting member 1c70f9bbb41018f in cluster a0d2de0531db7884 at commit index 4
2019-02-19 20:25:22.407995 I | raft: 1c70f9bbb41018f became follower at term 2
2019-02-19 20:25:22.408039 I | raft: newRaft 1c70f9bbb41018f [peers: [], term: 2, commit: 4, applied: 0, lastindex: 4, lastterm: 2]
unexpected fault address 0x7f43819ee000
fatal error: fault
[signal SIGBUS: bus error code=0x2 addr=0x7f43819ee000 pc=0x8808fd]

goroutine 1 [running]:
runtime.throw(0xfc556e, 0x5)
/usr/local/go/src/runtime/panic.go:616 +0x81 fp=0xc420222ed0 sp=0xc420222eb0 pc=0x42ade1
runtime.sigpanic()
/usr/local/go/src/runtime/signal_unix.go:385 +0x273 fp=0xc420222f20 sp=0xc420222ed0 pc=0x4405b3
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*DB).page(...)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/db.go:859
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Tx).page(...)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/tx.go:599
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Bucket).pageNode(0xc4202b00f8, 0x2, 0x18, 0xc4201b78e0)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/bucket.go:724 +0xad fp=0xc420222f98 sp=0xc420222f20 pc=0x8808fd
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Cursor).search(0xc420223138, 0x16062d0, 0x7, 0x7, 0x2)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/cursor.go:254 +0x50 fp=0xc420223050 sp=0xc420222f98 pc=0x881d90
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Cursor).seek(0xc420223138, 0x16062d0, 0x7, 0x7, 0x0, 0x0, 0x4, 0xc4201bc8c0, 0x1, 0x1, ...)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/cursor.go:159 +0xa5 fp=0xc4202230a0 sp=0xc420223050 pc=0x881695
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Bucket).CreateBucket(0xc4202b00f8, 0x16062d0, 0x7, 0x7, 0x0, 0x0, 0x0)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/bucket.go:165 +0xfa fp=0xc4202231a0 sp=0xc4202230a0 pc=0x87dc6a
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Tx).CreateBucket(0xc4202b00e0, 0x16062d0, 0x7, 0x7, 0x2, 0x1c70f9bbb41018f, 0x4)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/tx.go:108 +0x4f fp=0xc4202231e8 sp=0xc4202231a0 pc=0x88cbcf
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/mvcc/backend.(*batchTx).UnsafeCreateBucket(0xc4201be6f0, 0x16062d0, 0x7, 0x7)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/mvcc/backend/batch_tx.go:48 +0x6b fp=0xc420223280 sp=0xc4202231e8 pc=0x8de51b
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/membership.mustCreateBackendBuckets(0x10b3700, 0xc4201bb9d0)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/membership/store.go:166 +0xb7 fp=0xc4202232c0 sp=0xc420223280 pc=0x957827
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/membership.(*RaftCluster).SetBackend(0xc4201ce720, 0x10b3700, 0xc4201bb9d0)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/membership/cluster.go:203 +0x54 fp=0xc4202232e0 sp=0xc4202232c0 pc=0x9525f4
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver.NewServer(0x7ffee163af6a, 0x12, 0x0, 0x0, 0x0, 0x0, 0xc4201a1200, 0x1, 0x1, 0xc4201a1100, ...)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/server.go:396 +0x921 fp=0xc420223ab0 sp=0xc4202232e0 pc=0xb762e1
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/embed.StartEtcd(0xc420294000, 0xc420294480, 0x0, 0x0)
.....

[root@centos-vm etcd_bug]# kubectl logs sample-etcd-db-service-98f4f9459-4s27f -c sample-etcd-db
2019-02-19 20:36:20.383231 I | etcdmain: etcd Version: 3.3.11
2019-02-19 20:36:20.383404 I | etcdmain: Git SHA: 2cf9e51
2019-02-19 20:36:20.383413 I | etcdmain: Go Version: go1.10.7
2019-02-19 20:36:20.383419 I | etcdmain: Go OS/Arch: linux/amd64
2019-02-19 20:36:20.383434 I | etcdmain: setting maximum number of CPUs to 8, total number of available CPUs is 8
2019-02-19 20:36:20.386048 N | etcdmain: the server is already initialized as member before, starting as etcd member...
2019-02-19 20:36:20.386987 I | embed: listening for peers on http://localhost:2380
2019-02-19 20:36:20.388330 I | embed: listening for client requests on 0.0.0.0:2379
2019-02-19 20:36:20.437097 I | etcdserver: name = sample-etcd-db-new
2019-02-19 20:36:20.437177 I | etcdserver: data dir = /var/etcd/data
2019-02-19 20:36:20.437198 I | etcdserver: member dir = /var/etcd/data/member
2019-02-19 20:36:20.437211 I | etcdserver: heartbeat = 100ms
2019-02-19 20:36:20.437222 I | etcdserver: election = 1000ms
2019-02-19 20:36:20.437233 I | etcdserver: snapshot count = 100000
2019-02-19 20:36:20.437284 I | etcdserver: advertise client URLs = http://0.0.0.0:2379
2019-02-19 20:36:20.456385 I | etcdserver: restarting member 1c70f9bbb41018f in cluster a0d2de0531db7884 at commit index 4
2019-02-19 20:36:20.456489 I | raft: 1c70f9bbb41018f became follower at term 2
2019-02-19 20:36:20.456520 I | raft: newRaft 1c70f9bbb41018f [peers: [], term: 2, commit: 4, applied: 0, lastindex: 4, lastterm: 2]
unexpected fault address 0x7efabfc3e000
fatal error: fault
[signal SIGBUS: bus error code=0x2 addr=0x7efabfc3e000 pc=0x8808fd]

goroutine 1 [running]:
runtime.throw(0xfc556e, 0x5)
/usr/local/go/src/runtime/panic.go:616 +0x81 fp=0xc420258ed0 sp=0xc420258eb0 pc=0x42ade1
runtime.sigpanic()
/usr/local/go/src/runtime/signal_unix.go:385 +0x273 fp=0xc420258f20 sp=0xc420258ed0 pc=0x4405b3
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*DB).page(...)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/db.go:859
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Tx).page(...)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/tx.go:599
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Bucket).pageNode(0xc4203220f8, 0x2, 0x18, 0xc420227580)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/bucket.go:724 +0xad fp=0xc420258f98 sp=0xc420258f20 pc=0x8808fd
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Cursor).search(0xc420259138, 0x16062d0, 0x7, 0x7, 0x2)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/cursor.go:254 +0x50 fp=0xc420259050 sp=0xc420258f98 pc=0x881d90
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Cursor).seek(0xc420259138, 0x16062d0, 0x7, 0x7, 0x0, 0x0, 0x4, 0xc4202287f0, 0x1, 0x1, ...)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/cursor.go:159 +0xa5 fp=0xc4202590a0 sp=0xc420259050 pc=0x881695
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Bucket).CreateBucket(0xc4203220f8, 0x16062d0, 0x7, 0x7, 0x0, 0x0, 0x0)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/bucket.go:165 +0xfa fp=0xc4202591a0 sp=0xc4202590a0 pc=0x87dc6a
github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt.(*Tx).CreateBucket(0xc4203220e0, 0x16062d0, 0x7, 0x7, 0x2, 0x1c70f9bbb41018f, 0x4)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/bbolt/tx.go:108 +0x4f fp=0xc4202591e8 sp=0xc4202591a0 pc=0x88cbcf
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/mvcc/backend.(*batchTx).UnsafeCreateBucket(0xc420240b40, 0x16062d0, 0x7, 0x7)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/mvcc/backend/batch_tx.go:48 +0x6b fp=0xc420259280 sp=0xc4202591e8 pc=0x8de51b
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/membership.mustCreateBackendBuckets(0x10b3700, 0xc4202c8620)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/membership/store.go:166 +0xb7 fp=0xc4202592c0 sp=0xc420259280 pc=0x957827
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/membership.(*RaftCluster).SetBackend(0xc42022a960, 0x10b3700, 0xc4202c8620)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/membership/cluster.go:203 +0x54 fp=0xc4202592e0 sp=0xc4202592c0 pc=0x9525f4
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver.NewServer(0x7fff206f1fa8, 0x12, 0x0, 0x0, 0x0, 0x0, 0xc420280a00, 0x1, 0x1, 0xc420280700, ...)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver/server.go:396 +0x921 fp=0xc420259ab0 sp=0xc4202592e0 pc=0xb762e1
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/embed.StartEtcd(0xc4202da000, 0xc4202da480, 0x0, 0x0)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/embed/etcd.go:179 +0x811 fp=0xc42025a6e8 sp=0xc420259ab0 pc=0xcb6361
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdmain.startEtcd(0xc4202da000, 0xfc6677, 0x6, 0xc42025ad01, 0x2)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdmain/etcd.go:181 +0x40 fp=0xc42025a7b0 sp=0xc42025a6e8 pc=0xd263c0
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdmain.startEtcdOrProxyV2()
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdmain/etcd.go:102 +0x1369 fp=0xc42025bf08 sp=0xc42025a7b0 pc=0xd25d79
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdmain.Main()
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdmain/main.go:46 +0x3f fp=0xc42025bf78 sp=0xc42025bf08 pc=0xd2c81f
main.main()
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/etcd/main.go:28 +0x20 fp=0xc42025bf88 sp=0xc42025bf78 pc=0xd2e910
runtime.main()
/usr/local/go/src/runtime/proc.go:198 +0x212 fp=0xc42025bfe0 sp=0xc42025bf88 pc=0x42c652
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:2361 +0x1 fp=0xc42025bfe8 sp=0xc42025bfe0 pc=0x459f81

goroutine 51 [chan receive]:
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil.(*MergeLogger).outputLoop(0xc4201c3720)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil/merge_logger.go:174 +0x40d
created by github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil.NewMergeLogger
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil/merge_logger.go:92 +0x85

goroutine 104 [chan receive]:
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil.(*MergeLogger).outputLoop(0xc420262680)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil/merge_logger.go:174 +0x40d
created by github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil.NewMergeLogger
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil/merge_logger.go:92 +0x85

goroutine 72 [chan receive]:
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil.(*MergeLogger).outputLoop(0xc420224860)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil/merge_logger.go:174 +0x40d
created by github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil.NewMergeLogger
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/logutil/merge_logger.go:92 +0x85

goroutine 116 [syscall]:
os/signal.signal_recv(0x0)
/usr/local/go/src/runtime/sigqueue.go:139 +0xa6
os/signal.loop()
/usr/local/go/src/os/signal/signal_unix.go:22 +0x22
created by os/signal.init.0
/usr/local/go/src/os/signal/signal_unix.go:28 +0x41

goroutine 91 [select]:
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/wal.(*filePipeline).run(0xc42023fb00)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/wal/file_pipeline.go:89 +0x139
created by github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/wal.newFilePipeline
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/wal/file_pipeline.go:47 +0x11a

goroutine 90 [select]:
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/mvcc/backend.(*backend).run(0xc4202c8620)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/mvcc/backend/backend.go:267 +0x180
created by github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/mvcc/backend.newBackend
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/mvcc/backend/backend.go:161 +0x2ea

goroutine 92 [select]:
github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/raft.(*node).run(0xc42022a9c0, 0xc4202d4100)
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/raft/node.go:313 +0x5f8
created by github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/raft.RestartNode
/tmp/etcd-release-3.3.11/etcd/release/etcd/gopath/src/github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/raft/node.go:223 +0x321
4

0 回答 0