kubernetes组件监控实践

18 Oct, 2022 · by thur · Read in about 16 min · (3400 Words)
kubernetes metrics collecting

一实践

1.1 角色与权限配置

role和serviceaccount均以talk-test为前缀 , namespace为flashtalk

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test-role
rules:
  - apiGroups: [""]
    resources:
      - nodes
      - nodes/metrics
      - services
      - endpoints
      - pods
    verbs: ["get", "list", "watch"]
  - apiGroups:
      - extensions
      - networking.k8s.io
    resources:
      - ingresses
    verbs: ["get", "list", "watch"]
  - nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
    verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test-serviceaccount
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test-rolebinding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: talk-test-role
subjects:
- kind: ServiceAccount
  name: talk-test-serviceaccount
  namespace: flashtalk

1.2 监控kube-apisever

1.2.1 创建配置

apiVersion: v1
data:
  config.toml: |
    [global]
    # whether print configs
    print_configs = true

    # add label(agent_hostname) to series
    # "" -> auto detect hostname
    # "xx" -> use specified string xx
    # "$hostname" -> auto detect hostname
    # "$ip" -> auto detect ip
    # "$hostname-$ip" -> auto detect hostname and ip to replace the vars
    hostname = "$HOSTNAME"

    # will not add label(agent_hostname) if true
    omit_hostname = false

    # s | ms
    precision = "ms"

    # global collect interval
    interval = 15

    # [global.labels]
    # region = "shanghai"
    # env = "localhost"

    [writer_opt]
    # default: 2000
    batch = 2000
    # channel(as queue) size
    chan_size = 10000

    [[writers]]
    url = "http://172.31.62.213/prometheus/v1/write"

    # Basic auth username
    basic_auth_user = ""

    # Basic auth password
    basic_auth_pass = ""

    # timeout settings, unit: ms
    timeout = 5000
    dial_timeout = 2500
    max_idle_conns_per_host = 100
  prometheus.toml: |
    [prometheus]
    enable = true
    scrape_config_file="/opt/categraf/scrape/talk_test_in.yaml"
    ## log level, debug warn info error
    log_level="debug"
    ## wal reserve time duration, default value is 2 hour
    # wal_min_duration=2
kind: ConfigMap
metadata:
  name: talk-test-config
  namespace: flashtalk

scrape配置

apiVersion: v1
data:
  talk_test_in.yaml: |
    global:
      scrape_interval: 15s
      external_labels:
        scraper: talk-test
      #  cluster: test
      #  replica: 0

    scrape_configs:
      - job_name: "apiserver"
        metrics_path: "/metrics"
        kubernetes_sd_configs:
          - role: endpoints
        scheme: https
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
          insecure_skip_verify: true
        authorization:
          credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
          - source_labels:
              [
                __meta_kubernetes_namespace,
                __meta_kubernetes_service_name,
                __meta_kubernetes_endpoint_port_name,
              ]
            action: keep
            regex: default;kubernetes;https

    remote_write:
      - url: 'http://172.31.62.213/prometheus/v1/write'
kind: ConfigMap
metadata:
  name: talk-scrape-config
  namespace: talk

1.2.2 创建deployment

apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: n9e
    component: categraf
    release: deployment
  name: talk-test-scraper
  namespace: flashtalk
spec:
  progressDeadlineSeconds: 600
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: n9e
      component: categraf
      release: deployment
  template:
    metadata:
      labels:
        app: n9e
        component: categraf
    spec:
      containers:
      - args:
        - -configs
        - /opt/categraf/conf
        command:
        - /usr/bin/categraf
        env:
        - name: TZ
          value: Asia/Shanghai
        - name: HOSTNAME
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: spec.nodeName
        - name: HOSTIP
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: status.hostIP
        image: flashcatcloud/categraf:v0.2.11
        imagePullPolicy: IfNotPresent
        name: categraf
        resources: {}
        volumeMounts:
        - mountPath: /opt/categraf/conf
          name: talk-test-config
        - mountPath: /opt/categraf/scrape
          name: talk-scrape-config
      dnsPolicy: ClusterFirst
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccountName: talk-test-serviceaccount
      terminationGracePeriodSeconds: 30
      tolerations:
      - effect: NoSchedule
        operator: Exists
      volumes:
      - configMap:
          defaultMode: 420
          name: talk-test-config
        name: talk-test-config
      - configMap:
          defaultMode: 420
          name: talk-scrape-config
        name: talk-scrape-config

1.2.3 集群外监控

打开采集配置 enable=true , scrape_config_file 指定采集配置

[prometheus]
  enable=true ### #设置为true 
  scrape_config_file="/home/kongfei/flashtalk/categraf/scrape/talk_apiserver_out.yaml"
  ## log level, debug warn info error
  log_level="info"
  ## wal file storage path ,default ./data-agent
  # wal_storage_path="/path/to/storage"
  ## wal reserve time duration, default value is 2 hour
  # wal_min_duration=2

配置label， cluster=talk-test job=talk-test-apiserver-out 指定apiserver的地址和证书路径

global:
  scrape_interval: 15s
  external_labels:
    scraper: talk-test

scrape_configs:
  - job_name: "talk-test-apiserver-out"
    metrics_path: "/metrics"
    kubernetes_sd_configs:
      - role: endpoints
        api_server: "https://172.31.0.1:443"
        tls_config:
          ca_file: /etc/kubernetes/pki/ca.crt
          cert_file: /etc/kubernetes/pki/apiserver-kubelet-client.crt
          key_file: /etc/kubernetes/pki/apiserver-kubelet-client.key
          insecure_skip_verify: true
    scheme: https
    tls_config:
      ca_file: /etc/kubernetes/pki/ca.crt
      cert_file: /etc/kubernetes/pki/apiserver-kubelet-client.crt
      key_file: /etc/kubernetes/pki/apiserver-kubelet-client.key
      insecure_skip_verify: true
    relabel_configs:
      - source_labels:
          [
            __meta_kubernetes_namespace,
            __meta_kubernetes_service_name,
            __meta_kubernetes_endpoint_port_name,
          ]
        action: keep
        regex: default;kubernetes;https

remote_write:
  - url: 'http://172.31.62.213/prometheus/v1/write'

1.3 监控kube-controller-manager

/etc/kubernetes/manifests 其中的bind-address为--bind-address=0.0.0.0

1.3.1 创建service

apiVersion: v1
	kind: Service
	metadata:
	  namespace: kube-system
	  name: kube-controller-manager
	  labels:
	    k8s-app: kube-controller-manager
	spec:
	  selector:
	    component: kube-controller-manager
	  type: ClusterIP
	  clusterIP: None
	  ports:
	  - name: https
	    port: 10257
	    targetPort: 10257

1.3.2 scrape配置

角色权限，主配置复用apiserver的，talk-scrape-config.yaml修改为抓取controller-manager即可

apiVersion: v1
data:
  talk_test_in.yaml: |
    global:
      scrape_interval: 15s
      external_labels:
        scraper: talk-test
      #  cluster: test
      #  replica: 0

    scrape_configs:  
      - job_name: "talk-test-controller-in"
        metrics_path: "/metrics"
        kubernetes_sd_configs:
          - role: endpoints
        scheme: https
        tls_config:
          insecure_skip_verify: true
        authorization:
          credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
          - source_labels:
              [
                __meta_kubernetes_namespace,
                __meta_kubernetes_service_name,
                __meta_kubernetes_endpoint_port_name,
              ]
            action: keep
            regex: kube-system;kube-controller-manager;https

    remote_write:
      - url: 'http://172.31.62.213/prometheus/v1/write'
kind: ConfigMap
metadata:
  name: talk-scrape-config
  namespace: flashtalk

1.4 监控sheduler

1.4.1 暴露端口

/etc/kubernetes/manifests 其中的bind-address为 --bind-address=0.0.0.0

1.4.2 创建service。

apiVersion: v1
kind: Service
metadata:
  name: scheduler-service
spec:
  ports:
    - name: https
      protocol: TCP
      port: 10259
      targetPort: 10259

1.4.3 创建endpoint

如果组件部署到物理机上，创建service后无法自动创建endpoint, 那需要手动创建endpoint。注意：保持endpoint name与service一致是关键。

apiVersion: v1
kind: Endpoints
metadata:
  name: scheduler-service #与service name保持一致 
subsets:
  - addresses:
      - ip: 10.206.0.16 #指定IP
    ports:
      - name: https #与service中的portname保持一致
        port: 10259 #指定端口

1.4.4 scrape配置

token放到/path/to/token目录下

apiVersion: v1
data:
  talk_test_in.yaml: |
    global:
      scrape_interval: 15s
      external_labels:
        scraper: talk-test
      #  cluster: test
      #  replica: 0

    scrape_configs:  
      - job_name: "talk-test-scheduler-in"
        metrics_path: "/metrics"
        kubernetes_sd_configs:
          - role: endpoints
        scheme: https
        tls_config:
          insecure_skip_verify: true
        authorization:
          credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
          - source_labels:
              [
                __meta_kubernetes_namespace,
                __meta_kubernetes_service_name,
                __meta_kubernetes_endpoint_port_name,
              ]
            action: keep
            regex: kube-system;kube-scheduler;https

    remote_write:
      - url: 'http://172.31.62.213/prometheus/v1/write'
kind: ConfigMap
metadata:
  name: talk-scrape-config
  namespace: flashtalk

1.5 监控etcd

---
kind: ConfigMap
metadata:
  name: etcd-pki
apiVersion: v1
data:
  ca.crt: |
    -----BEGIN CERTIFICATE-----
   xxxx
    -----END CERTIFICATE-----
  client.crt: |
    -----BEGIN CERTIFICATE-----
    xxxx
    -----END CERTIFICATE-----
  client.key: |
    -----BEGIN RSA PRIVATE KEY-----
    xxxxx
    -----END RSA PRIVATE KEY-----
---

注意，如果用secret方式，则需要注意要将证书内容先base64 一次(或者不想麻烦，就用stringdata格式)。

1.6 监控kubelet

1.6.1 集中式采集

适用集群规模较小的场景

1.6.1.1 创建角色

之前的role内容大部分相同，注意新增两个权限点

      - nodes/stats # 注意这一个 json格式，要转换
      - nodes/proxy # 注意这一个 serverless使用

1.6.1.2创建deployment

略

1.6.1.3 scrape配置

大规模的集群不推荐

global:
  scrape_interval: 15s
  external_labels:
    scraper: node-test-ca
scrape_configs:
  - job_name: "kube-node"
    metrics_path: "/metrics"
    kubernetes_sd_configs:
      - role: node # 注意这里不是endpoints了
        api_server: "https://172.31.0.1:443"
        tls_config:
          ca_file: /etc/kubernetes/pki/ca.crt
          cert_file: /etc/kubernetes/pki/apiserver-kubelet-client.crt
          key_file: /etc/kubernetes/pki/apiserver-kubelet-client.key
          insecure_skip_verify: true
         # 推荐使用token方式
    scheme: https
    tls_config:
      ca_file: /etc/kubernetes/pki/ca.crt
      cert_file: /etc/kubernetes/pki/apiserver-kubelet-client.crt
      key_file: /etc/kubernetes/pki/apiserver-kubelet-client.key
      insecure_skip_verify: true
    relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)

remote_write:
  - url: 'http://172.31.62.213/prometheus/v1/write'

1.6.2 daemonset采集

推荐模式，适用于集群规模较大的场景

1.6.2.1 创建角色

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test-role
rules:
  - apiGroups: [""]
    resources:
      - nodes
      - nodes/metrics 
      - nodes/proxy # 注意这一个 serverless使用
      - services
      - endpoints
      - pods
    verbs: ["get", "list", "watch"]
  - apiGroups:
      - extensions
      - networking.k8s.io
    resources:
      - ingresses
    verbs: ["get", "list", "watch"]
  - nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
    verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test-serviceaccount
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test-rolebinding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: talk-test-role
subjects:
- kind: ServiceAccount
  name: talk-test-serviceaccount
  namespace: flashtalk

1.6.2.2 创建配置

---
kind: ConfigMap
metadata:
  name: categraf-config
apiVersion: v1
data:
  config.toml: |
    [global]
    # whether print configs
    print_configs = false
    # add label(agent_hostname) to series
    # "" -> auto detect hostname
    # "xx" -> use specified string xx
    # "$hostname" -> auto detect hostname
    # "$ip" -> auto detect ip
    # "$hostname-$ip" -> auto detect hostname and ip to replace the vars
    hostname = "$HOSTNAME"
    # will not add label(agent_hostname) if true
    omit_hostname = false
    # s | ms
    precision = "ms"
    # global collect interval
    interval = 15
    # [global.labels]
    # region = "shanghai"
    # env = "localhost"
    [writer_opt]
    # default: 2000
    batch = 2000
    # channel(as queue) size
    chan_size = 10000
    [[writers]]
    url = "http://172.31.62.213/prometheus/v1/write"
    # Basic auth username
    basic_auth_user = ""
    # Basic auth password
    basic_auth_pass = ""
    # timeout settings, unit: ms
    timeout = 5000
    dial_timeout = 2500
    max_idle_conns_per_host = 100
 
---
kind: ConfigMap
metadata:
  name: input-cpu
apiVersion: v1
data:
  cpu.toml: |
    # # collect interval
    # interval = 15
    # # whether collect per cpu
    # collect_per_cpu = false
---
kind: ConfigMap
metadata:
  name: input-disk
apiVersion: v1
data:
  disk.toml: |
    # # collect interval
    # interval = 15
    # # By default stats will be gathered for all mount points.
    # # Set mount_points will restrict the stats to only the specified mount points.
    # mount_points = ["/"]
    # Ignore mount points by filesystem type.
    ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
    ignore_mount_points = ["/boot"]
---
kind: ConfigMap
metadata:
  name: input-diskio
apiVersion: v1
data:
  diskio.toml: |-
    # # collect interval
    # interval = 15
    # # By default, categraf will gather stats for all devices including disk partitions.
    # # Setting devices will restrict the stats to the specified devices.
    # devices = ["sda", "sdb", "vd*"]
---
kind: ConfigMap
metadata:
  name: input-docker
apiVersion: v1
data:
  docker.toml: |
    # # collect interval
    # interval = 15
    [[instances]]
    # # append some labels for series
    # labels = { region="cloud", product="n9e" }
    # # interval = global.interval * interval_times
    # interval_times = 1
    ## Docker Endpoint
    ##   To use TCP, set endpoint = "tcp://[ip]:[port]"
    ##   To use environment variables (ie, docker-machine), set endpoint = "ENV"
    endpoint = "unix:///var/run/docker.sock"
    ## Set to true to collect Swarm metrics(desired_replicas, running_replicas)
    gather_services = false
    gather_extend_memstats = false
    container_id_label_enable = true
    container_id_label_short_style = false
    ## Containers to include and exclude. Globs accepted.
    ## Note that an empty array for both will include all containers
    container_name_include = []
    container_name_exclude = []
    ## Container states to include and exclude. Globs accepted.
    ## When empty only containers in the "running" state will be captured.
    ## example: container_state_include = ["created", "restarting", "running", "removing", "paused", "exited", "dead"]
    ## example: container_state_exclude = ["created", "restarting", "running", "removing", "paused", "exited", "dead"]
    # container_state_include = []
    # container_state_exclude = []
    ## Timeout for docker list, info, and stats commands
    timeout = "5s"
    ## Specifies for which classes a per-device metric should be issued
    ## Possible values are 'cpu' (cpu0, cpu1, ...), 'blkio' (8:0, 8:1, ...) and 'network' (eth0, eth1, ...)
    ## Please note that this setting has no effect if 'perdevice' is set to 'true'
    perdevice_include = []
    ## Specifies for which classes a total metric should be issued. Total is an aggregated of the 'perdevice' values.
    ## Possible values are 'cpu', 'blkio' and 'network'
    ## Total 'cpu' is reported directly by Docker daemon, and 'network' and 'blkio' totals are aggregated by this plugin.
    ## Please note that this setting has no effect if 'total' is set to 'false'
    total_include = ["cpu", "blkio", "network"]
    ## Which environment variables should we use as a tag
    ##tag_env = ["JAVA_HOME", "HEAP_SIZE"]
    ## docker labels to include and exclude as tags. Globs accepted.
    ## Note that an empty array for both will include all labels as tags
    docker_label_include = []
    docker_label_exclude = ["annotation*", "io.kubernetes*", "*description*", "*maintainer*", "*hash", "*author*"]
    ## Optional TLS Config
    # use_tls = false
    # tls_ca = "/etc/telegraf/ca.pem"
    # tls_cert = "/etc/telegraf/cert.pem"
    # tls_key = "/etc/telegraf/key.pem"
    ## Use TLS but skip chain & host verification
    # insecure_skip_verify = false
---
kind: ConfigMap
metadata:
  name: input-kubelet-metrics
apiVersion: v1
data:
  prometheus.toml: |
    # # collect interval
    # interval = 15
    [[instances]]
    # kubelete metrics & cadvisor
    urls = ["https://127.0.0.1:10250/metrics/","https://127.0.0.1:10250/metrics/cadvisor"]
    bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
    use_tls = true
    insecure_skip_verify = true
    url_label_key = "instance"
    url_label_value = "{{.Host}}"
    # if you use dashboards, do not delete this label
    labels = {job="talk-scraper"}
---
kind: ConfigMap
metadata:
  name: input-kernel
apiVersion: v1
data:
  kernel.toml: |
    # # collect interval
    # interval = 15
---
kind: ConfigMap
metadata:
  name: input-kernel-vmstat
apiVersion: v1
data:
  kernel_vmstat.toml: |
    # # collect interval
    # interval = 15
    # file: /proc/vmstat
    [white_list]
    oom_kill = 1
    nr_free_pages = 0
    nr_alloc_batch = 0
    nr_inactive_anon = 0
    nr_active_anon = 0
    nr_inactive_file = 0
    nr_active_file = 0
    nr_unevictable = 0
    nr_mlock = 0
    nr_anon_pages = 0
    nr_mapped = 0
    nr_file_pages = 0
    nr_dirty = 0
    nr_writeback = 0
    nr_slab_reclaimable = 0
    nr_slab_unreclaimable = 0
    nr_page_table_pages = 0
    nr_kernel_stack = 0
    nr_unstable = 0
    nr_bounce = 0
    nr_vmscan_write = 0
    nr_vmscan_immediate_reclaim = 0
    nr_writeback_temp = 0
    nr_isolated_anon = 0
    nr_isolated_file = 0
    nr_shmem = 0
    nr_dirtied = 0
    nr_written = 0
    numa_hit = 0
    numa_miss = 0
    numa_foreign = 0
    numa_interleave = 0
    numa_local = 0
    numa_other = 0
    workingset_refault = 0
    workingset_activate = 0
    workingset_nodereclaim = 0
    nr_anon_transparent_hugepages = 0
    nr_free_cma = 0
    nr_dirty_threshold = 0
    nr_dirty_background_threshold = 0
    pgpgin = 0
    pgpgout = 0
    pswpin = 0
    pswpout = 0
    pgalloc_dma = 0
    pgalloc_dma32 = 0
    pgalloc_normal = 0
    pgalloc_movable = 0
    pgfree = 0
    pgactivate = 0
    pgdeactivate = 0
    pgfault = 0
    pgmajfault = 0
    pglazyfreed = 0
    pgrefill_dma = 0
    pgrefill_dma32 = 0
    pgrefill_normal = 0
    pgrefill_movable = 0
    pgsteal_kswapd_dma = 0
    pgsteal_kswapd_dma32 = 0
    pgsteal_kswapd_normal = 0
    pgsteal_kswapd_movable = 0
    pgsteal_direct_dma = 0
    pgsteal_direct_dma32 = 0
    pgsteal_direct_normal = 0
    pgsteal_direct_movable = 0
    pgscan_kswapd_dma = 0
    pgscan_kswapd_dma32 = 0
    pgscan_kswapd_normal = 0
    pgscan_kswapd_movable = 0
    pgscan_direct_dma = 0
    pgscan_direct_dma32 = 0
    pgscan_direct_normal = 0
    pgscan_direct_movable = 0
    pgscan_direct_throttle = 0
    zone_reclaim_failed = 0
    pginodesteal = 0
    slabs_scanned = 0
    kswapd_inodesteal = 0
    kswapd_low_wmark_hit_quickly = 0
    kswapd_high_wmark_hit_quickly = 0
    pageoutrun = 0
    allocstall = 0
    pgrotated = 0
    drop_pagecache = 0
    drop_slab = 0
    numa_pte_updates = 0
    numa_huge_pte_updates = 0
    numa_hint_faults = 0
    numa_hint_faults_local = 0
    numa_pages_migrated = 0
    pgmigrate_success = 0
    pgmigrate_fail = 0
    compact_migrate_scanned = 0
    compact_free_scanned = 0
    compact_isolated = 0
    compact_stall = 0
    compact_fail = 0
    compact_success = 0
    htlb_buddy_alloc_success = 0
    htlb_buddy_alloc_fail = 0
    unevictable_pgs_culled = 0
    unevictable_pgs_scanned = 0
    unevictable_pgs_rescued = 0
    unevictable_pgs_mlocked = 0
    unevictable_pgs_munlocked = 0
    unevictable_pgs_cleared = 0
    unevictable_pgs_stranded = 0
    thp_fault_alloc = 0
    thp_fault_fallback = 0
    thp_collapse_alloc = 0
    thp_collapse_alloc_failed = 0
    thp_split = 0
    thp_zero_page_alloc = 0
    thp_zero_page_alloc_failed = 0
    balloon_inflate = 0
    balloon_deflate = 0
    balloon_migrate = 0
---
kind: ConfigMap
metadata:
  name: input-sysctl-fs
apiVersion: v1
data:
  linux_sysctl_fs.toml: |
    # # collect interval
    # interval = 15
---
kind: ConfigMap
metadata:
  name: input-mem
apiVersion: v1
data:
  mem.toml: |
    # # collect interval
    # interval = 15
    # # whether collect platform specified metrics
    collect_platform_fields = true
---
kind: ConfigMap
metadata:
  name: input-net
apiVersion: v1
data:
  net.toml: |-
    # # collect interval
    # interval = 15
    # # whether collect protocol stats on Linux
    # collect_protocol_stats = false
    # # setting interfaces will tell categraf to gather these explicit interfaces
    # interfaces = ["eth0"]
---
kind: ConfigMap
metadata:
  name: input-netstat
apiVersion: v1
data:
  netstat.toml: |
    # # collect interval
    # interval = 15
---
kind: ConfigMap
metadata:
  name: input-processes
apiVersion: v1
data:
  processes.toml: |-
    # # collect interval
    # interval = 15
    # # force use ps command to gather
    # force_ps = false
    # # force use /proc to gather
    # force_proc = false
---
kind: ConfigMap
metadata:
  name: input-system
apiVersion: v1
data:
  system.toml: |
    # # collect interval
    # interval = 15
    # # whether collect metric: system_n_users
    # collect_user_number = false
---

1.6.2.3 创建daemonset

apiVersion: apps/v1
kind: DaemonSet
metadata:
  labels:
    app: n9e
    compenent: categraf
    release: daemonset
  name: talk-test
  namespace: flashtalk
spec:
  selector:
    matchLabels:
      app: n9e
      component: categraf
      release: daemonset
  template:
    metadata:
      labels:
        app: n9e
        component: categraf
        release: daemonset
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/os
                operator: In
                values:
                - linux
      containers:
      - env:
        - name: TZ
          value: Asia/Shanghai
        - name: HOSTNAME
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: spec.nodeName
        - name: HOSTIP
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: status.hostIP
        - name: HOST_PROC
          value: /hostfs/proc
        - name: HOST_SYS
          value: /hostfs/sys
        - name: HOST_MOUNT_PREFIX
          value: /hostfs
        image: flashcatcloud/categraf:latest
        imagePullPolicy: IfNotPresent
        name: categraf
        resources: {}
        volumeMounts:
        - mountPath: /etc/categraf/conf/config.toml
          name: categraf-config
          subPath: config.toml
        - mountPath: /etc/categraf/conf/input.cpu
          name: input-cpu
        - mountPath: /etc/categraf/conf/input.mem
          name: input-mem
        - mountPath: /etc/categraf/conf/input.disk
          name: input-disk
        - mountPath: /etc/categraf/conf/input.diskio
          name: input-diskio
        - mountPath: /etc/categraf/conf/input.net
          name: input-net
        - mountPath: /etc/categraf/conf/input.netstat
          name: input-netstat
        - mountPath: /etc/categraf/conf/input.docker
          name: input-docker
        - mountPath: /etc/categraf/conf/input.kubernetes
          name: input-kubernetes
        - mountPath: /etc/categraf/conf/input.prometheus
          name: input-kubelet-metrics
        - mountPath: /etc/categraf/conf/input.kernel
          name: input-kernel
        - mountPath: /etc/categraf/conf/input.kernel_vmstat
          name: input-kernel-vmstat
        - mountPath: /etc/categraf/conf/input.linux_sysctl_fs
          name: input-sysctl-fs
        - mountPath: /etc/categraf/conf/input.processes
          name: input-processes
        - mountPath: /etc/categraf/conf/input.system
          name: input-system
        - mountPath: /var/run/utmp
          name: hostroutmp
          readOnly: true
        - mountPath: /hostfs
          name: hostrofs
          readOnly: true
        - mountPath: /var/run/docker.sock
          name: docker-socket
      dnsPolicy: ClusterFirstWithHostNet #
      hostNetwork: true
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccount: talk-test-serviceaccount #warn
      serviceAccountName: talk-test-serviceaccount #warn
      tolerations:
      - effect: NoSchedule
        operator: Exists
      volumes:
      - configMap:
          defaultMode: 420
          name: categraf-config
        name: categraf-config
      - configMap:
          defaultMode: 420
          name: input-cpu
        name: input-cpu
      - configMap:
          defaultMode: 420
          name: input-mem
        name: input-mem
      - configMap:
          defaultMode: 420
          name: input-disk
        name: input-disk
      - configMap:
          defaultMode: 420
          name: input-diskio
        name: input-diskio
      - configMap:
          defaultMode: 420
          name: input-net
        name: input-net
      - configMap:
          defaultMode: 420
          name: input-netstat
        name: input-netstat
      - configMap:
          defaultMode: 420
          name: input-docker
        name: input-docker
      - configMap:
          defaultMode: 420
          name: input-kubernetes
        name: input-kubernetes
      - configMap:
          defaultMode: 420
          name: input-kubelet-metrics
        name: input-kubelet-metrics
      - configMap:
          defaultMode: 420
          name: input-prometheus
        name: input-prometheus
      - configMap:
          defaultMode: 420
          name: input-kernel
        name: input-kernel
      - configMap:
          defaultMode: 420
          name: input-kernel-vmstat
        name: input-kernel-vmstat
      - configMap:
          defaultMode: 420
          name: input-sysctl-fs
        name: input-sysctl-fs
      - configMap:
          defaultMode: 420
          name: input-processes
        name: input-processes
      - configMap:
          defaultMode: 420
          name: input-system
        name: input-system
      - hostPath:
          path: /
          type: ""
        name: hostrofs
      - hostPath:
          path: /var/run/utmp
          type: ""
        name: hostroutmp
      - hostPath:
          path: /var/run/docker.sock
          type: Socket
        name: docker-socket

1.7 采集kube-state-metrics指标

1.7.1 创建配置

---
apiVersion: v1
data:
  config.toml: |
    [global]
    # whether print configs
    print_configs = true

    # add label(agent_hostname) to series
    # "" -> auto detect hostname
    # "xx" -> use specified string xx
    # "$hostname" -> auto detect hostname
    # "$ip" -> auto detect ip
    # "$hostname-$ip" -> auto detect hostname and ip to replace the vars
    hostname = "$HOSTNAME"

    # will not add label(agent_hostname) if true
    omit_hostname = false

    # s | ms
    precision = "ms"

    # global collect interval
    interval = 15

    # [global.labels]
    # region = "shanghai"
    # env = "localhost"

    [writer_opt]
    # default: 2000
    batch = 2000
    # channel(as queue) size
    chan_size = 10000

    [[writers]]
    url = "http://172.31.62.213/prometheus/v1/write"

    # Basic auth username
    basic_auth_user = ""

    # Basic auth password
    basic_auth_pass = ""

    # timeout settings, unit: ms
    timeout = 5000
    dial_timeout = 2500
    max_idle_conns_per_host = 100
  prometheus.toml: |
    [prometheus]
    enable = true
    scrape_config_file="/opt/categraf/scrape/ksm.yaml"
    ## log level, debug warn info error
    log_level="debug"
    ## wal reserve time duration, default value is 2 hour
    # wal_min_duration=2
kind: ConfigMap
metadata:
  name: talk-test-config
  namespace: flashtalk
---
apiVersion: v1
data:
  ksm.yaml: |
    global:
      scrape_interval: 15s
      external_labels:
        scraper: talk-test
        cluster: test
      #  cluster: test
      #  replica: 0

    scrape_configs:
      - job_name: "flashtalk-test-ksm-in"
        metrics_path: "/metrics"
        kubernetes_sd_configs:
          - role: endpoints
        scheme: http
        relabel_configs:
          - source_labels:
              [
                __meta_kubernetes_namespace,
                __meta_kubernetes_service_name,
                __meta_kubernetes_endpoint_port_name,
              ]
            action: keep
            regex: kube-system;kube-state-metrics;http-metrics

    remote_write:
      - url: 'http://172.31.62.213/prometheus/v1/write'
kind: ConfigMap
metadata:
  name: talk-scrape-config
  namespace: flashtalk

1.7.2 创建deployment

略

4.8 serverless环境

1.8 serverless环境node指标采集

serverless与apiserver通信在公有云环境下一般都没有限制，与node通信（例如采集pod资源）可能会有些环境差异。下面以采集/metrics/cadvisor为例，讲解一个通用的方式。

1.8.1 创建角色与权限

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test
  namespace: flashtalk
rules:
  - apiGroups: [""]
    resources:
      - nodes
      - nodes/metrics
      - nodes/stats
      - nodes/proxy
      - services
      - endpoints
      - pods
    verbs: ["get", "list", "watch"]
  - nonResourceURLs: ["/metrics"]
    verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test
  namespace: flashtalk
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  annotations: {}
  labels:
    app: n9e
    component: categraf
  name: talk-test
  namespace: flashtalk
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: talk-test
subjects:
- kind: ServiceAccount
  name: talk-test
  namespace: flashtalk

1.8.2 创建配置


kind: ConfigMap
metadata:
  name: categraf-config
apiVersion: v1
data:
  config.toml: |
    [global]
    # whether print configs
    print_configs = true

    # add label(agent_hostname) to series
    # "" -> auto detect hostname
    # "xx" -> use specified string xx
    # "" -> auto detect hostname
    # "" -> auto detect ip
    # "-" -> auto detect hostname and ip to replace the vars
    hostname = "tt-fc-dev01.nj"

    # will not add label(agent_hostname) if true
    omit_hostname = false

    # s | ms
    precision = "ms"

    # global collect interval
    interval = 15

    # [global.labels]
    # region = "shanghai"
    # env = "localhost"

    [writer_opt]
    # default: 2000
    batch = 2000
    # channel(as queue) size
    chan_size = 10000

    [[writers]]
    url = "http://172.31.62.213/prometheus/v1/write"

    # Basic auth username
    basic_auth_user = ""

    # Basic auth password
    basic_auth_pass = ""

    # timeout settings, unit: ms
    timeout = 5000
    dial_timeout = 2500
    max_idle_conns_per_host = 100
    [http]
    enable = false
    address = ":9100"
    print_access = false
    run_mode = "release"
  prometheus.toml: |
    [prometheus]
    enable = true
    scrape_config_file="/opt/categraf/scrape/in_cluster_scrape.yaml"
    ## log level, debug warn info error
    log_level="info"
    ## wal reserve time duration, default value is 2 hour
    # wal_min_duration=2
---
kind: ConfigMap
metadata:
  name: scrape-config
apiVersion: v1
data:
  in_cluster_scrape.yaml: |
    global:
      scrape_interval: 15s
      #external_labels:
      #  cluster: test
      #  replica: 0
    scrape_configs:
      - job_name: serverless-resource
        scrape_interval: 10s
        scrape_timeout: 10s
        kubernetes_sd_configs:
          - role: node
        scheme: https
        tls_config:
          insecure_skip_verify: true
        authorization:
          credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
          - action: labelmap
            regex: __meta_kubernetes_node_label_(.+)
          - target_label: __address__
            replacement: kubernetes.default.svc:443
          - source_labels: [__meta_kubernetes_node_name]
            regex: (.+)
            target_label: __metrics_path__
            replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor

    remote_write:
      - url: 'http://172.31.62.213/prometheus/v1/write'

1.8.3 创建deployment

apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app: n9e
    component: categraf
    release: deployment
  name: nightingale-categraf
spec:
  replicas: 1
  selector:
    matchLabels:
      app: n9e
      component: categraf
      release: deployment
  template:
    metadata:
      labels:
        app: n9e
        component: categraf
    spec:
      containers:
      - env:
        - name: TZ
          value: Asia/Shanghai
        - name: HOSTNAME
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: spec.nodeName
        - name: HOSTIP
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: status.hostIP
        image: flashcatcloud/categraf:latest
        imagePullPolicy: IfNotPresent
        name: categraf
        command: ["/usr/bin/categraf"]
        args: ["-configs", "/opt/categraf/conf"]
        resources: {}
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /opt/categraf/conf
          name: categraf-config
        - mountPath: /opt/categraf/scrape
          name: scrape-config
      dnsPolicy: ClusterFirst
      hostNetwork: false
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccountName: talk-test
      terminationGracePeriodSeconds: 30
      tolerations:
      - effect: NoSchedule
        operator: Exists
      volumes:
      - configMap:
          defaultMode: 420
          name: categraf-config
        name: categraf-config
      - configMap:
          defaultMode: 420
          name: scrape-config
        name: scrape-config

一 实践