我正在按照本指南设置 cadvisor+prometheus+Grafana 来监控 docker 容器。一切看起来都很好,除了我在容器级别找不到指标(主机指标显示很好)。例如,如果我在 prometheus 中检索“container_memory_cache”,它会返回以下结果“
container_memory_cache{id="/",instance="cadvisor:8080",job="cadvisor"}
“id”维度为空,并且没有“name”维度。通过检查更多,我尝试运行 cAdvisor 的其余 API 以使用容器 id/name 检索容器信息,但仍然没有运气:
[root@ip-172-31-56-244 dockprom]# docker ps -a
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
02e385dc1bf1 grafana/grafana "/run.sh" 18 minutes ago Up 18 minutes 0.0.0.0:3000->3000/tcp grafana
30ae6527facb prom/node-exporter "/bin/node_exporte..." 18 minutes ago Up 18 minutes 9100/tcp nodeexporter
927f0144f8c5 google/cadvisor:v0.27.1 "/usr/bin/cadvisor..." 18 minutes ago Up 18 minutes 0.0.0.0:8080->8080/tcp cadvisor
4c277db6d796 prom/prometheus "/bin/prometheus -..." 18 minutes ago Up 18 minutes 0.0.0.0:9090->9090/tcp prometheus
b169c96dad3e prom/alertmanager "/bin/alertmanager..." 18 minutes ago Up 18 minutes 0.0.0.0:9093->9093/tcp alertmanager
[root@ip-172-31-56-244 dockprom]# curl http://127.0.0.1:8080/api/v1.3/containers/02e385dc1bf1
failed to get container "/02e385dc1bf1" with error: unknown container "/02e385dc1bf1"
[root@ip-172-31-56-244 dockprom]# curl http://127.0.0.1:8080/api/v1.3/events/grafana
[]
[root@ip-172-31-56-244 dockprom]#
[root@ip-172-31-56-244 dockprom]# curl http://127.0.0.1:8080/api/v1.3/containers/grafana
failed to get container "/grafana" with error: unknown container "/grafana"
[root@ip-172-31-56-244 dockprom]# curl http://127.0.0.1:8080/api/v2.1/
Supported request types: "appmetrics,attributes,events,machine,machinestats,ps,spec,stats,storage,summary,version"
[root@ip-172-31-56-244 dockprom]# curl http://127.0.0.1:8080/api/v2.1/events/
[]
这是来自 docker-compose.yml 的 cadvisor 配置:
cadvisor:
image: google/cadvisor:v0.27.1
container_name: cadvisor
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
restart: unless-stopped
expose:
- 8080
ports:
- 8080:8080
networks:
- monitor-net
labels:
org.label-schema.group: "monitoring"
......
管理员日志:
[root@ip-172-31-56-244 dockprom]# docker logs cadvisor
I1009 09:28:39.904951 1 storagedriver.go:50] Caching stats in memory for 2m0s
I1009 09:28:39.905178 1 manager.go:149] cAdvisor running in container: "/sys/fs/cgroup/cpu"
W1009 09:28:39.933801 1 manager.go:157] unable to connect to Rkt api service: rkt: cannot tcp Dial rkt api service: dial tcp [::1]:15441: getsockopt: connection refused
W1009 09:28:39.933995 1 manager.go:166] unable to connect to CRI-O api service: Get http://%2Fvar%2Frun%2Fcrio.sock/info: dial unix /var/run/crio.sock: connect: no such file or directory
I1009 09:28:39.965342 1 fs.go:139] Filesystem UUIDs: map[]
I1009 09:28:39.965362 1 fs.go:140] Filesystem partitions: map[tmpfs:{mountpoint:/dev major:0 minor:47 fsType:tmpfs blockSize:0} /dev/xvda1:{mountpoint:/var/lib/docker/overlay2 major:202 minor:1 fsType:ext4 blockSize:0} shm:{mountpoint:/ rootfs/var/lib/docker/containers/663cfebcdcb5c83740441d225a90546e4368789d676d6b6086d59fdcb5bdfe1f/shm major:0 minor:31 fsType:tmpfs blockSize:0}]
I1009 09:28:39.971298 1 info.go:51] Couldn't collect info from any of the files in "/rootfs/etc/machine-id,/var/lib/dbus/machine-id"
I1009 09:28:39.971390 1 manager.go:216] Machine: {NumCores:2 CpuFrequency:2394312 MemoryCapacity:8373006336 HugePages:[{PageSize:2048 NumPages:0}] MachineID: SystemUUID:EC20823C-306D-BF65-BC34-A40C692B43A6 BootID:2db18d19-659b-493f-b3b5-7980fd48d301 Filesystems:[{Device:/dev/xvda1 DeviceMajor:202 DeviceMinor:1 Capacity:105553080320 Type:vfs Inodes:6553600 HasInodes:true} {Device:shm DeviceMajor:0 DeviceMinor:31 Capacity:67108864 Type:vfs Inodes:1022095 HasInodes:true} {Device:overlay DeviceMajor:0 DeviceMinor:30 Capacity:105553080320 Type:vfs Inodes:6553600 HasInodes:true} {Device:tmpfs DeviceMajor:0 DeviceMinor:47 Capacity:4186501120 Type:vfs Inodes:1022095 HasInodes:true}] DiskMap:map[202:0:{Name:xvda Major:202 Minor:0 Size:107374182400 Scheduler:noop}] NetworkDevices:[{Name:br-169495c710b6 MacAddress:02:42:7d:01:a4:0d Speed:0 Mtu:1500} {Name:eth0 MacAddress:12:79:be:fa:a6:d2 Speed:0 Mtu:9001}] Topology:[{Id:0 Memory:8373006336 Cores:[{Id:0 Threads:[0] Caches:[{Size:32768 Type:Data Level:1} {Size:32768 Type:Instruction Level:1} {Size:262144 Type:Unified Level:2}]} {Id:1 Threads:[1] Caches:[{Size:32768 Type:Data Level:1} {Size:32768 Type:Instruction Level:1} {Size:262144 Type:Unified Level:2}]}] Caches:[{Size:31457280 Type:Unified Level:3}]}] CloudProvider:AWS InstanceType:t2.large InstanceID:i-0e3ac492ad4c0b5f6}
I1009 09:28:39.972406 1 manager.go:222] Version: {KernelVersion:4.9.43-17.39.amzn1.x86_64 ContainerOsVersion:Alpine Linux v3.4 DockerVersion:17.03.2-ce DockerAPIVersion:1.27 CadvisorVersion:v0.27.1 CadvisorRevision:cda62a4}
I1009 09:28:39.984490 1 factory.go:355] Registering Docker factory
W1009 09:28:39.984547 1 manager.go:265] Registration of the rkt container factory failed: unable to communicate with Rkt api service: rkt: cannot tcp Dial rkt api service: dial tcp [::1]:15441: getsockopt: connection refused
W1009 09:28:39.984704 1 manager.go:276] Registration of the crio container factory failed: Get http://%2Fvar%2Frun%2Fcrio.sock/info: dial unix /var/run/crio.sock: connect: no such file or directory
I1009 09:28:39.984735 1 factory.go:54] Registering systemd factory
I1009 09:28:39.985581 1 factory.go:86] Registering Raw factory
I1009 09:28:39.986558 1 manager.go:1140] Started watching for new ooms in manager
I1009 09:28:39.987201 1 manager.go:311] Starting recovery of all containers
I1009 09:28:39.989175 1 manager.go:316] Recovery completed
I1009 09:28:39.994765 1 cadvisor.go:159] Starting cAdvisor version: v0.27.1-cda62a4 on port 8080