1. 创建一个namespace[kube-ops.yaml]
1 apiVersion: v1 2 kind: Namespace 3 metadata: 4 name: kube-ops
2. 设置配置文件[kube-config.yaml]
1 apiVersion: v1
2 kind: ConfigMap
3 metadata:
4 name: prometheus-config
5 namespace: kube-ops
6 data:
7 prometheus.yml: |
8 global:
9 scrape_interval: 15s
10 evaluation_interval: 15s
11 scrape_configs:
12
13 - job_name: 'kubernetes-apiservers'
14 kubernetes_sd_configs:
15 - role: endpoints
16 scheme: https
17 tls_config:
18 ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
19 bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
20 relabel_configs:
21 - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
22 action: keep
23 regex: default;kubernetes;https
24
25 - job_name: 'kubernetes-nodes'
26 kubernetes_sd_configs:
27 - role: node
28 relabel_configs:
29 - source_labels: [__address__]
30 regex: '(.*):10250'
31 replacement: '${1}:9100'
32 target_label: __address__
33 action: replace
34 - action: labelmap
35 regex: __meta_kubernetes_node_label_(.+)
36
37 - job_name: 'kubernetes-kubelet'
38 kubernetes_sd_configs:
39 - role: node
40 relabel_configs:
41 - source_labels: [__address__]
42 regex: '(.*):10250'
43 replacement: '${1}:10255'
44 target_label: __address__
45 action: replace
46 - action: labelmap
47 regex: __meta_kubernetes_node_label_(.+)
48
49 - job_name: 'kubernetes-cadvisor'
50 kubernetes_sd_configs:
51 - role: node
52 scheme: https
53 tls_config:
54 ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
55 bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
56 relabel_configs:
57 - action: labelmap
58 regex: __meta_kubernetes_node_label_(.+)
59 - target_label: __address__
60 replacement: kubernetes.default.svc:443
61 - source_labels: [__meta_kubernetes_node_name]
62 regex: (.+)
63 target_label: __metrics_path__
64 replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
65
66 - job_name: 'kubernetes-service-endpoints'
67 kubernetes_sd_configs:
68 - role: endpoints
69 relabel_configs:
70 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
71 action: keep
72 regex: true
73 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
74 action: replace
75 target_label: __scheme__
76 regex: (https?)
77 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
78 action: replace
79 target_label: __metrics_path__
80 regex: (.+)
81 - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
82 action: replace
83 target_label: __address__
84 regex: ([^:]+)(?::\d+)?;(\d+)
85 replacement: $1:$2
86 - action: labelmap
87 regex: __meta_kubernetes_service_label_(.+)
88 - source_labels: [__meta_kubernetes_namespace]
89 action: replace
90 target_label: kubernetes_namespace
91 - source_labels: [__meta_kubernetes_service_name]
92 action: replace
93 target_label: kubernetes_name
94
95 - job_name: 'kubernetes-services'
96 kubernetes_sd_configs:
97 - role: service
98 metrics_path: /probe
99 params:
100 module: [http_2xx]
101 relabel_configs:
102 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
103 action: keep
104 regex: true
105 - source_labels: [__address__]
106 target_label: __param_target
107 - target_label: __address__
108 replacement: blackbox-exporter.example.com:9115
109 - source_labels: [__param_target]
110 target_label: instance
111 - action: labelmap
112 regex: __meta_kubernetes_service_label_(.+)
113 - source_labels: [__meta_kubernetes_namespace]
114 target_label: kubernetes_namespace
115 - source_labels: [__meta_kubernetes_service_name]
116 target_label: kubernetes_name
117
118 - job_name: 'kubernetes-ingresses'
119 kubernetes_sd_configs:
120 - role: ingress
121 relabel_configs:
122 - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
123 action: keep
124 regex: true
125 - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
126 regex: (.+);(.+);(.+)
127 replacement: ${1}://${2}${3}
128 target_label: __param_target
129 - target_label: __address__
130 replacement: blackbox-exporter.example.com:9115
131 - source_labels: [__param_target]
132 target_label: instance
133 - action: labelmap
134 regex: __meta_kubernetes_ingress_label_(.+)
135 - source_labels: [__meta_kubernetes_namespace]
136 target_label: kubernetes_namespace
137 - source_labels: [__meta_kubernetes_ingress_name]
138 target_label: kubernetes_name
139
140 - job_name: 'kubernetes-pods'
141 kubernetes_sd_configs:
142 - role: pod
143 relabel_configs:
144 - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
145 action: keep
146 regex: true
147 - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
148 action: replace
149 target_label: __metrics_path__
150 regex: (.+)
151 - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
152 action: replace
153 regex: ([^:]+)(?::\d+)?;(\d+)
154 replacement: $1:$2
155 target_label: __address__
156 - action: labelmap
157 regex: __meta_kubernetes_pod_label_(.+)
158 - source_labels: [__meta_kubernetes_namespace]
159 action: replace
160 target_label: kubernetes_namespace
161 - source_labels: [__meta_kubernetes_pod_name]
162 action: replace
163 target_label: kubernetes_pod_name
3. prom 权限设置[prom-rbac.yaml]
1 apiVersion: v1 2 kind: ServiceAccount 3 metadata: 4 name: prometheus 5 namespace: kube-ops 6 --- 7 apiVersion: rbac.authorization.k8s.io/v1 8 kind: ClusterRole 9 metadata: 10 name: prometheus 11 rules: 12 - apiGroups: 13 - "" 14 resources: 15 - nodes 16 - services 17 - endpoints 18 - pods 19 - nodes/proxy 20 verbs: 21 - get 22 - list 23 - watch 24 - apiGroups: 25 - "" 26 resources: 27 - configmaps 28 - nodes/metrics 29 verbs: 30 - get 31 - nonResourceURLs: 32 - /metrics 33 verbs: 34 - get 35 --- 36 apiVersion: rbac.authorization.k8s.io/v1beta1 37 kind: ClusterRoleBinding 38 metadata: 39 name: prometheus 40 roleRef: 41 apiGroup: rbac.authorization.k8s.io 42 kind: ClusterRole 43 name: prometheus 44 subjects: 45 - kind: ServiceAccount 46 name: prometheus 47 namespace: kube-ops
4. 部署pod [prom-deploy.yaml]
1 apiVersion: apps/v1beta2
2 kind: Deployment
3 metadata:
4 labels:
5 name: prometheus-deployment
6 name: prometheus
7 namespace: kube-ops
8 spec:
9 replicas: 2
10 selector:
11 matchLabels:
12 app: prometheus
13 template:
14 metadata:
15 labels:
16 app: prometheus
17 spec:
18 containers:
19 - image: prom/prometheus:latest
20 name: prometheus
21 command:
22 - "/bin/prometheus"
23 args:
24 - "--config.file=/etc/prometheus/prometheus.yml"
25 - "--storage.tsdb.path=/prometheus"
26 - "--storage.tsdb.retention=24h"
27 ports:
28 - containerPort: 9090
29 protocol: TCP
30 volumeMounts:
31 - mountPath: "/prometheus"
32 name: data
33 - mountPath: "/etc/prometheus"
34 name: config-volume
35 resources:
36 requests:
37 cpu: 100m
38 memory: 100Mi
39 limits:
40 cpu: 500m
41 memory: 2500Mi
42 serviceAccountName: prometheus
43 volumes:
44 - name: data
45 emptyDir: {}
46 - name: config-volume
47 configMap:
48 name: prometheus-config
5. 部署prom service [prom-svc.yaml]
1 kind: Service 2 apiVersion: v1 3 metadata: 4 labels: 5 app: prometheus 6 name: prometheus 7 namespace: kube-ops 8 spec: 9 type: NodePort 10 ports: 11 - port: 9090 12 targetPort: 9090 13 nodePort: 30003 14 selector: 15 app: prometheus
6. 部署node信息收集 [node-ex.yaml]
1 apiVersion: extensions/v1beta1 2 kind: DaemonSet 3 metadata: 4 name: node-exporter 5 namespace: kube-ops 6 labels: 7 k8s-app: node-exporter 8 spec: 9 template: 10 metadata: 11 labels: 12 k8s-app: node-exporter 13 spec: 14 hostPID: true 15 hostIPC: true 16 hostNetwork: true 17 dnsPolicy: ClusterFirstWithHostNet 18 containers: 19 - image: prom/node-exporter:latest 20 name: node-exporter 21 ports: 22 - containerPort: 9100 23 protocol: TCP 24 name: http
7. 部署 state-metrics [各个文件在下面分别列出]
1 [kube-state-metrics-cluster-role-binding.yaml] 2 3 apiVersion: rbac.authorization.k8s.io/v1 4 # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 5 kind: ClusterRoleBinding 6 metadata: 7 name: kube-state-metrics 8 roleRef: 9 apiGroup: rbac.authorization.k8s.io 10 kind: ClusterRole 11 name: kube-state-metrics 12 subjects: 13 - kind: ServiceAccount 14 name: kube-state-metrics 15 namespace: kube-system 16 17 18 [kube-state-metrics-role-binding.yaml] 19 20 apiVersion: rbac.authorization.k8s.io/v1 21 # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 22 kind: RoleBinding 23 metadata: 24 name: kube-state-metrics 25 namespace: kube-system 26 roleRef: 27 apiGroup: rbac.authorization.k8s.io 28 kind: Role 29 name: kube-state-metrics-resizer 30 subjects: 31 - kind: ServiceAccount 32 name: kube-state-metrics 33 namespace: kube-system 34 35 [kube-state-metrics-service.yaml] 36 37 apiVersion: v1 38 kind: Service 39 metadata: 40 name: kube-state-metrics 41 namespace: kube-system 42 labels: 43 k8s-app: kube-state-metrics 44 annotations: 45 prometheus.io/scrape: 'true' 46 spec: 47 ports: 48 - name: http-metrics 49 port: 8080 50 targetPort: http-metrics 51 protocol: TCP 52 - name: telemetry 53 port: 8081 54 targetPort: telemetry 55 protocol: TCP 56 selector: 57 k8s-app: kube-state-metrics 58 59 [kube-state-metrics-cluster-role.yaml] 60 61 apiVersion: rbac.authorization.k8s.io/v1 62 # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 63 kind: ClusterRole 64 metadata: 65 name: kube-state-metrics 66 rules: 67 - apiGroups: [""] 68 resources: 69 - configmaps 70 - secrets 71 - nodes 72 - pods 73 - services 74 - resourcequotas 75 - replicationcontrollers 76 - limitranges 77 - persistentvolumeclaims 78 - persistentvolumes 79 - namespaces 80 - endpoints 81 verbs: ["list", "watch"] 82 - apiGroups: ["extensions"] 83 resources: 84 - daemonsets 85 - deployments 86 - replicasets 87 - ingresses 88 verbs: ["list", "watch"] 89 - apiGroups: ["apps"] 90 resources: 91 - daemonsets 92 - deployments 93 - replicasets 94 - statefulsets 95 verbs: ["list", "watch"] 96 - apiGroups: ["batch"] 97 resources: 98 - cronjobs 99 - jobs 100 verbs: ["list", "watch"] 101 - apiGroups: ["autoscaling"] 102 resources: 103 - horizontalpodautoscalers 104 verbs: ["list", "watch"] 105 - apiGroups: ["policy"] 106 resources: 107 - poddisruptionbudgets 108 verbs: ["list", "watch"] 109 - apiGroups: ["certificates.k8s.io"] 110 resources: 111 - certificatesigningrequests 112 verbs: ["list", "watch"] 113 114 [kube-state-metrics-role.yaml] 115 apiVersion: rbac.authorization.k8s.io/v1 116 # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 117 kind: Role 118 metadata: 119 namespace: kube-system 120 name: kube-state-metrics-resizer 121 rules: 122 - apiGroups: [""] 123 resources: 124 - pods 125 verbs: ["get"] 126 - apiGroups: ["apps"] 127 resources: 128 - deployments 129 resourceNames: ["kube-state-metrics"] 130 verbs: ["get", "update"] 131 - apiGroups: ["extensions"] 132 resources: 133 - deployments 134 resourceNames: ["kube-state-metrics"] 135 verbs: ["get", "update"] 136 137 [kube-state-metrics-deployment.yaml] 138 139 apiVersion: apps/v1 140 # Kubernetes version 1.8.x should use apps/v1beta2 141 # Kubernetes versions before 1.8.0 should use apps/v1beta1 or extensions/v1beta1 142 kind: Deployment 143 metadata: 144 name: kube-state-metrics 145 namespace: kube-system 146 spec: 147 selector: 148 matchLabels: 149 k8s-app: kube-state-metrics 150 replicas: 1 151 template: 152 metadata: 153 labels: 154 k8s-app: kube-state-metrics 155 spec: 156 serviceAccountName: kube-state-metrics 157 containers: 158 - name: kube-state-metrics 159 image: quay.io/coreos/kube-state-metrics:v1.6.0 160 ports: 161 - name: http-metrics 162 containerPort: 8080 163 - name: telemetry 164 containerPort: 8081 165 readinessProbe: 166 httpGet: 167 path: /healthz 168 port: 8080 169 initialDelaySeconds: 5 170 timeoutSeconds: 5 171 172 [kube-state-metrics-service-account.yaml] 173 174 apiVersion: v1 175 kind: ServiceAccount 176 metadata: 177 name: kube-state-metrics 178 namespace: kube-system