外部prometheus监控k8s

新建RBAC

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
cat prometheus-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- nonResourceURLs:
- "/metrics"
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: kube-system

获取ca.crt和token

1
2
3
kubectl get sa prometheus -o yaml -n kube-system
kubectl get secrets prometheus-token-tn6ww -n kube-system -o yaml
base64解码ca.crt和token

配置prometheus的Job

1
2
3
4
5
6
7
8
9
10
11
12
- job_name: 'k8s-test-cadvisor'
scheme: https
scrape_interval: 10s
tls_config:
ca_file: /opt/prometheus/etc/ca.crt #配置证书
insecure_skip_verify: true
bearer_token_file: /opt/prometheus/etc/token #配置token
metrics_path: /metrics/cadvisor
file_sd_configs:
- refresh_interval: 30s #重载配置文件间隔
files:
- /opt/prometheus/etc/targets/target_k8s.json

配置confd

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
prometheus_discovery_k8s.tmpl
[
{{- range $index, $info := getvs "/prometheus/discovery/k8s/*" -}}
{{- $data := json $info -}}
{{- if ne $index 0 }},{{- end }}
{
"targets": [
"{{$data.address}}"
],
"labels":{
"node": "{{$data.name}}"
{{- if $data.labels -}}
{{- range $data.labels -}}
,"{{.key}}": "{{.val}}"
{{- end}}
{{- end}}
}
}{{- end }}
]

prometheus_discovery_k8s.toml
[template]
src = "prometheus_discovery_k8s.tmpl"
dest = "/opt/prometheus/etc/targets/target_k8s.json"
mode = "0777"
keys = [
"/prometheus/discovery/k8s",
]
reload_cmd = "curl -XPOST 'http://127.0.0.1:9090/-/reload'"

模拟自动发现

1
etcdctl put /prometheus/discovery/k8s/node01 '{"name":"node01","address":"10.200.1.205:10250","labels":[{"key":"label1","val":"test1"},{"key":"label2","val":"test2"}]}'

部署kube-state-metrics服务

1
2
3
4
5
6
7
8
9
10
访问https://github.com/kubernetes/kube-state-metrics/tree/master/examples/standard 获取k8s资源清单
k8s版本v1.20.4 kube-state-metrics版本v2.0.0
dockerhub.codoon.com/kube-state-metrics/kube-state-metrics:v2.0.0
注意替换镜像,调整service为NodePort
配置prometheus Job
- job_name: 'k8s-test-kube-state'
scrape_interval: 10s
static_configs:
- targets:
- '10.200.1.205:8879'

说明

1
2
1.pod等信息通过kubelet默认集成cAdvisor获取
2.其他k8s资源信息需要通过kube-state-metrics服务获取

prometheus + confd + etcd 自动发现

  • 架构

    1. Prometheus的配置文件都是经由confd从etcd中读取并生成
    2. 采集端采用node-exporter,kafka-exporter,mysql-exporter等进行采集,启动的时候需要调用cmdb接口将自身数据写入etcd
    3. codoon-alert通过与etcd进行交互,对rules,告警屏蔽等进行配置
  • 主配置文件

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    global:
    scrape_interval: 10s #抓取间隔
    scrape_timeout: 10s #抓取超时时间
    evaluation_interval: 15s #评估规则间隔
    alerting:
    alertmanagers:
    - scheme: http
    timeout: 10s
    api_version: v1
    static_configs:
    - targets:
    - 127.0.0.1:9093
    rule_files:
    - /codoon/prometheus/etc/rules/rule_*.yml
    scrape_configs:
    - job_name: prometheus
    honor_timestamps: true
    scrape_interval: 10s
    scrape_timeout: 10s
    metrics_path: /metrics
    scheme: http
    static_configs:
    - targets:
    - 127.0.0.1:9090
    - job_name: codoon_ops
    honor_timestamps: true
    scrape_interval: 10s
    scrape_timeout: 10s
    metrics_path: /metrics
    scheme: http
    file_sd_configs:
    - files:
    - /codoon/prometheus/etc/targets/target_*.json
    refresh_interval: 20s #重载配置文件间隔
  • prometheus启动命令

    1
    2
    3
    /codoon/prometheus/prometheus --web.enable-lifecycle --config.file=/codoon/prometheus/etc/prometheus.yml --storage.tsdb.path=/codoon/prometheus

    nohup ./prometheus --web.enable-lifecycle --config.file=./etc/prometheus.yml --storage.tsdb.path=/codoon/prometheus --web.external-url=xxx.com/ 2>&1 > prometheus.log &
  • confd配置文件

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    #服务发现
    #conf.d/discovery_host.toml
    [template]
    src = "discovery_host.tmpl"
    dest = "/codoon/prometheus/etc/targets/target_host.json"
    mode = "0777"
    keys = [
    "/prometheus/discovery/host",
    ]
    reload_cmd = "curl -XPOST 'http://127.0.0.1:9090/-/reload'"

    #templates/discovery_host.tmpl
    [
    {{- range $index, $info := getvs "/prometheus/discovery/host/*" -}}
    {{- $data := json $info -}}
    {{- if ne $index 0 }},{{- end }}
    {
    "targets": [
    "{{$data.address}}"
    ],
    "labels":{
    "instance": "{{$data.name}}"
    {{- if $data.labels -}}
    {{- range $data.labels -}}
    ,"{{.key}}": "{{.val}}"
    {{- end}}
    {{- end}}
    }
    }{{- end }}
    ]

    #规则下发
    #conf.d/rule_host.toml
    [template]
    src = "rule_host.tmpl"
    dest = "/codoon/prometheus/etc/rules/rule_host.yml"
    mode = "0777"
    keys = [
    "/prometheus/rule/host",
    ]
    reload_cmd = "curl -XPOST 'http://127.0.0.1:9090/-/reload'"

    #templates/rule_host.tmpl
    groups:
    - name: host
    rules:
    {{- range $info := getvs "/prometheus/rule/host/*"}}
    {{- $data := json $info}}
    {{- if $data.status}}
    - alert: {{$data.alert}}
    expr: {{$data.expr}}
    for: {{$data.for}}
    {{- if $data.labels}}
    labels:
    {{- range $data.labels}}
    {{.key}}: {{.val}}
    {{- end}}
    {{- end}}
    annotations:
    {{- if $data.summary}}
    summary: "{{$data.summary}}"
    {{- end}}
    {{- if $data.description}}
    description: "{{$data.description}}"
    {{- end}}
    {{- end }}
    {{- end }}
  • confd启动命令

    1
    2
    3
    /codoon/prometheus/confd-0.16.0-linux-amd64 -confdir /codoon/prometheus/confd/ -backend etcdv3  -watch -node http://127.0.0.1:2379

    nohup ./confd-0.16.0-linux-amd64 -confdir ./confd/ -backend etcdv3 -watch -node http://127.0.0.1:2379 2>&1 > confd.log &
  • 模拟服务发现

    1
    2
    3
    4
    #标签默认有instance: name
    etcdctl put /prometheus/discovery/host/test1 '{"name":"test1","address":"10.12.10.1:9091"}'
    #自定义标签
    etcdctl put /prometheus/discovery/host/test2 '{"name":"test2","address":"10.12.10.1:9092","labels":[{"key":"label1","val":"test1"},{"key":"label2","val":"test2"}]}'
  • 模拟规则下发

    1
    2
    3
    etcdctl put /prometheus/rule/host/test1 '{"alert":"test1 is down","expr":"up == 0","for":"30s","summary":"s1","description":"d1"}'
    #自定义标签
    etcdctl put /prometheus/rule/host/test2 '{"alert":"test2 is down","expr":"up == 0","for":"1m","summary":"s1","description":"d1","labels":[{"key":"label1","val":"test1"},{"key":"label2","val":"test2"}]}'
  • alertmanager

    1
    nohup ./alertmanager-0.21.0.linux-amd64/alertmanager --config.file=alertmanager-0.21.0.linux-amd64/alertmanager.yml 2>&1 > alertmanager.log &
  • 常用promsql

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    /prometheus/rule/host/nodata
    #无数据
    {"status":true,"alert":"no data","expr":"up == 0","for":"5m","summary":"no data","description":"{{$labels.instance}} no data for 5m, curr: {{ $value }}","labels":[{"key":"diyk","val":"diyv"}]}

    /prometheus/rule/host/availcpult20
    #cpu可用率小于20%
    {"status":true,"alert":"avail cpu lt 20%","expr":"avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) by (type,instance,env,ip) < 0.2","for":"5m","summary":"avail cpu lt 20%","description":"avail cpu lt 20% for 5m, curr: {{ $value }}","labels":[{"key":"diyk","val":"diyv"}]}

    /prometheus/rule/host/availmemlt20
    #mem可用率小于20%
    {"status":true,"alert":"avail mem lt 20%","expr":"1-(node_memory_MemTotal_bytes - node_memory_Cached_bytes - node_memory_Buffers_bytes - node_memory_MemFree_bytes) /node_memory_MemTotal_bytes < 0.2","for":"5m","summary":"avail mem lt 20%","description":"avail mem lt 20% for 5m, curr: {{ $value }}","labels":[{"key":"diyk","val":"diyv"}]}

    /prometheus/rule/host/availdisklt20
    #disk可用率小于20%
    {"status":true,"alert":"avail disk lt 20%","expr":"node_filesystem_avail_bytes{fstype=~\"ext.*xfs\",mountpoint!~\".*docker.*.*pod.*.*container.*kubelet\"} /node_filesystem_size_bytes{fstype=~\"ext.*xfs\",mountpoint!~\".*docker.*.*pod.*.*container.*kubelet\"} < 0.2","for":"5m","summary":"avail disk lt 20%","description":"mount: {{ $labels.mountpoint }} avail lt 20G for 5m, curr: {{ $value }}","labels":[{"key":"diyk","val":"diyv"}]}

    /prometheus/rule/host/load1toohigh
    #1分钟负载
    {"status":true,"alert":"load1 is too high","expr":"node_load1/2 > on(type,instance,env,ip) count(node_cpu_seconds_total{mode=\"system\"}) by (type,instance,env,ip)","for":"5m","summary":"load1 is too high","description":"load1 is too high for 5m, curr: {{ $value }}","labels":[{"key":"diyk","val":"diyv"}]}

    /prometheus/rule/host/useiopsgt80
    #iops使用率大于80%
    {"status": true,"alert":"iops too high","expr":"rate(node_disk_io_time_seconds_total[5m]) > 0.8","for":"5m","summary":"iops too high","description":"iops too high for 5m, curr: {{ $value }}","labels":[{"key":"diyk","val":"diyv"}]}

    (1 - (node_memory_MemFree_bytes{origin_prometheus=~"$origin_prometheus",job=~"$job"} +node_memory_Buffers_bytes{origin_prometheus=~"$origin_prometheus",job=~"$job"} +node_memory_Cached_bytes{origin_prometheus=~"$origin_prometheus",job=~"$job"} / (node_memory_MemTotal_bytes{origin_prometheus=~"$origin_prometheus",job=~"$job"})))* 100

    ((node_memory_MemTotal_bytes{origin_prometheus=~"$origin_prometheus",job=~"$job"} - node_memory_MemFree_bytes{origin_prometheus=~"$origin_prometheus",job=~"$job"} - node_memory_Buffers_bytes{origin_prometheus=~"$origin_prometheus",job=~"$job"} - node_memory_Cached_bytes) / (node_memory_MemTotal_bytes{origin_prometheus=~"$origin_prometheus",job=~"$job"} )) * 100

    #告警规则整理
    1分钟的负载大于cpu核心数 持续5m
    node_load1 > on(instance,ip) count(node_cpu_seconds_total{mode="system"}) by (instance,ip)

    CPU可用率小于20% 持续5m
    avg(rate(node_cpu_seconds_total{mode="system"}[5m])) by (instance) *100
    avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (instance) *100
    avg(rate(node_cpu_seconds_total{mode="iowait"}[5m])) by (instance) *100
    avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance) *100

    磁盘可用率小于20%且可用小于20G 持续5m
    (node_filesystem_avail_bytes{fstype=~\"ext.*xfs\",mountpoint!~\".*pod.*.*docker-lib.*\"} / node_filesystem_size_bytes{fstype=~\"ext.*xfs\",mountpoint!~\".*pod.*.*docker-lib.*\"} < 0.2) and node_filesystem_avail_bytes{fstype=~\"ext.*xfs\",mountpoint!~\".*pod.*.*docker-lib.*\"} < 20*1024^3

    内存使用率大于80% 持续5m
    (node_memory_MemTotal_bytes - node_memory_Cached_bytes - node_memory_Buffers_bytes - node_memory_MemFree_bytes) /node_memory_MemTotal_bytes

    IOPS write大于300 read 大于2000 持续5m
    rate(node_disk_reads_completed_total[5m]) > 1000 or rate(node_disk_writes_completed_total[5m]) > 200

    网卡 1小时总流量 5分钟速率
    increase(node_network_receive_bytes_total[60m]) /1024/1024
    increase(node_network_transmit_bytes_total[60m]) /1024/1024
    rate(node_network_receive_bytes_total[5m])*8
    rate(node_network_transmit_bytes_total[5m])*8
  • temp

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    {"status": true,"alert":"rw iops too high","expr":"rate(node_disk_io_time_seconds_total[5m]) > 0.8","for":"5m","summary":"iops too high","description":"iops too high for 5m, curr: {{ $value }}","labels":[{"key":"receiver","val":"xxxx,xxxx,xxx"}

    etcdctl put /prometheus/discovery/host/codoon-istio-master01 '{"name":"codoon-istio-master01","address":"10.10.16.73:9100","labels": [{"key":"type","val":"host"},{"key":"ip","val":"10.10.16.73"}]}'

    etcdctl put /prometheus/rule/host/cpuavail20 '{"alert":"cpu avail less 20","expr":"avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) by (instance) < 0.2","for":"5m","summary":"avail less 20","description":"cpu avail less 20 for 5m, curr: {{ $value }}","labels":[{"key":"receiver","val":"xxx"}]}'

    etcdctl put /prometheus/rule/host/memuse80 '{"alert":"mem use gt 80","expr":"(node_memory_MemTotal_bytes - node_memory_Cached_bytes - node_memory_Buffers_bytes - node_memory_MemFree_bytes) /node_memory_MemTotal_bytes > 0.8","for":"5m","summary":"use gt 80","description":"mem use gt 80 for 5m, curr: {{ $value }}","labels":[{"key":"receiver","val":"xxx"}]}'

    etcdctl put /prometheus/rule/host/iopsth '{"alert":"rw iops too high","expr":"rate(node_disk_reads_completed_total[5m]) > 1000 or rate(node_disk_writes_completed_total[5m]) > 200","for":"5m","summary":"iops too high","description":"iops too high for 5m, curr: {{ $value }}","labels":[{"key":"receiver","val":"xxxx"}]}'

    {
    "status": true,
    "alert": "avail disk lt 20%",
    "expr": "node_filesystem_avail_bytes{fstype=~\"ext.*xfs\",mountpoint!~\".*docker.*.*pod.*.*container.*kubelet\"} /node_filesystem_size_bytes{fstype=~\"ext.*xfs\",mountpoint!~\".*docker.*.*pod.*.*container.*kubelet\"} < 0.2 and node_filesystem_avail_bytes{fstype=~\"ext.*xfs\",mountpoint!~\".*docker.*.*pod.*.*container.*kubelet\"} < 50*1024^3",
    "for": "2m",
    "summary": "avail disk lt 20%",
    "description": "mount: {{ $labels.mountpoint }} avail lt 20% for 2m, curr: {{ $value }}",
    "labels": [{
    "key": "severity",
    "val": "warnning"
    }]
    }

    etcdctl put /prometheus/rule/host/load1too2high '{"status":true,"alert":"load1 is too2 high","expr":"node_load1 > on(type,instance,env,ip) count(node_cpu_seconds_total{mode=\"system\"}) by (type,instance,env,ip) /1.5","for":"2m","summary":"load1 is too2 high","description":"load1 is too2 high for 2m, curr: {{ $value }}","labels":[{"key":"severity","val":"critical"}]}'
  • 启动脚本

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    vim /usr/lib/systemd/system/prometheus.service
    [Unit]
    Description=prometheus
    Documentation=codoon_ops
    After=network.target
    [Service]
    EnvironmentFile=-/etc/sysconfig/prometheus
    User=prometheus
    ExecStart=/usr/local/prometheus/prometheus \
    --web.enable-lifecycle \
    --storage.tsdb.path=/codoon/prometheus/data \
    --config.file=/codoon/prometheus/etc/prometheus.yml \
    --web.listen-address=0.0.0.0:9090 \
    --web.external-url= $PROM_EXTRA_ARGS \
    --log.level=debug
    Restart=on-failure
    StartLimitInterval=1
    RestartSec=3
    [Install]
    WantedBy=multi-user.target

    systemctl daemon-reload
    systemctl enable prometheus
  • docker

    1
    docker run --name promconfd -d -v /codoon/prometheus/etc:/opt/prometheus/etc -v /codoon/prometheus/data:/opt/prometheus/data -v /codoon/prometheus/confd/etc:/opt/confd/etc -p 9090:9090 dockerhub.xxxx.com/prom/prometheus:v2.24.1
  • 部署方式

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    prometheus+confd 以docker方式部署 prom-monitor
    tsdb数据库存放路径:/codoon/prometheus/data
    prometheus配置文件路径:/codoon/prometheus/etc
    confd配置文件路径:/codoon/prometheus/confd/etc

    ops-etcd012
    etcd服务自动发现
    /prometheus/discovery/host/*
    /prometheus/discovery/db/*
    ...

    规则自动下发
    /prometheus/rule/host/*
    /prometheus/rule/host/*
    ...
  • 发送消息策略

    1
    2
    3
    4
    1、warnning级别告警首次先等1分钟再发,
    看同类型是否有critical级别告警,若有立即发送,warnning级别告警不再发送
    2、warnning级别告警间隔20分钟发送1次
    3、critical级别告警间隔10分钟发送1次
  • 静默配置

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    通过opscenter配置,原理是通过标签判断过滤,会找最优匹配
    抑制逻辑:同一告警高优先级自动抑制低优先级,高优先级恢复后自动解除抑制
    静默配置保存到ops-etcd /prometheus/silencev2

    支持alertname:instance:lables... 告警名称、实例、IP、级别等正则匹配
    新增静默 POST
    curl -X POST -H 'Content-Type: application/json' -d '{"sc_key":"tidb","sc_val":"instance:severity:alertname:tidb-(nodessd-[0-9]+)warnning(load1.*avail cpu.*)"}' codoon-alert.in.xxx.com:8875/backend/codoon_alert/api/v1/silence
    删除静默 DELETE
    curl -X DELETE codoon-alert.in.xxx.com:8875/backend/codoon_alert/api/v1/silence/tidb
    查看静默 GET
    curl codoon-alert.in.xx.com:8875/backend/codoon_alert/api/v1/silence

    查看alertconfig配置 GET
    curl codoon-alert.in.xxx.com:8875/backend/codoon_alert/api/v1/alertconfig?cfg_key=noticewaitclearreslove

    {
    "data": {
    "apitmporcheckall": "instance:alertname:(nginx-api-tmpapicheck(-[0-9])?)(.*)",
    "intwarnall": "instance:severity:alertname:integrationwarnning(.*)",
    "istio": "instance:severity:alertname:(codoon[0-9]+istio)warnning(load1.*)",
    "monitor_roy": "instance:severity:alertname:monitor_roywarnning(load1.*)",
    "testall": "instance:alertname:testall(.*)",
    "tidb": "instance:severity:alertname:tidb-(nodessd-[0-9]+)warnning(load1.*avail cpu.*)"
    },
    "description": "ok",
    "status": "OK"
    }
  • 告警配置

    1
    2
    3
    和静默配置原理一样,通过标签过滤,默认会找最优匹配,标签匹配逻辑,
    优先检查=、!=,其次检查=~、!~(正则)
    告警配置保存到ops-etcd /prometheus/receiver
  • 告警模板

    1
    2
    3
    通过opscenter自定义,告警大于3条时会自动收拢,
    同时会再发一封邮件(包括完整告警信息)
    告警配置保存到ops-etcd /prometheus/template
  • 其他说明

    1
    2
    3
    4
    5
    标签type=service会根据服务名称(service=xxx)通过cmdb获取告警人
    不希望收到恢复通知,可在标签中配置resolved=no
    pod cpu/mem(pprof_type=memory/cpu)告警会发pprof
    service error/panic(log_type: ERRO/PANIC)会从loki获取详情并发送
    servicemap 日志名与服务映射,watch err_check/service_map

代码审查平台

代码审查平台

1
2
3
4
5
6
7
8
9
sonarqube
#1、启动pg docker
docker run --name db -e POSTGRES_USER=sonar -e POSTGRES_PASSWORD=codoon.com -d postgres
#2、创建volume
docker volume create sonarqube_data
docker volume create sonarqube_extensions
docker volume create sonarqube_logs
#3、启动sonarqube
docker run -d --name sonarqube -p 9000:9000 --link db -e SONAR_JDBC_URL=jdbc:postgresql://db:5432/sonar -e SONAR_JDBC_USERNAME=sonar -e SONAR_JDBC_PASSWORD=codoon.com -v sonarqube_data:/opt/sonarqube/data -v sonarqube_extensions:/opt/sonarqube/extensions -v sonarqube_logs:/opt/sonarqube/logs sonarqube:8.9.3-community

缓存淘汰策略

淘汰策略

  • FIFO(First In First Out)

    先进先出,也就是淘汰缓存中最老(最早添加)的记录,创建一个队列,新增记录添加到队尾,当内存不足时,淘汰队首;

    但是很多场景下,部分记录虽然是最早添加的但也经常被访问,这类数据会被频繁添加缓存然后又被淘汰,导致命中率降低

  • LFU(Least Frequently Used)

    最少使用,也就是淘汰缓存中访问频率最低的记录,LFU需要维护一个按访问次数排序的队列,每次访问次数加1,队列重新排序,

    当内存不足时,淘汰访问次数最少的记录,维护每个记录的访问次数,对内存消耗较高,另外访问模式发生变化,LFU需要时间去适应,也就是说LFU算法受历史数据影响较大,比如某个记录历史访问很高,但在某个时间点后几乎不再被访问,因历史访问次数过高,迟迟不能被淘汰

  • LRU(Least Recently Used)

    最近最少使用,创建一个队列,如果某个记录被访问了,则移动到队尾,那么队首则是最少访问的数据,当内存不足时,淘汰改记录即可

Go语言实现LRU

  • 字典/双向链表(Map list.List)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
type Cache struct {
maxBytes int64 //最大容量
uBytes int64 //已使用容量
ll *list.List //双向链表
cache map[string]*list.Element //缓存数据
OnRemoved func(key string, value Value) //当记录被淘汰时回调
}

type Value interface {
Len() int
}

type entry struct {
key string
value Value
}

func New(maxBytes int64, onRemoved func(string, Value)) *Cache {
return &Cache{
maxBytes: maxBytes,
ll: list.New(),
cache: make(map[string]*list.Element),
OnRemoved: onRemoved,
}
}
  • 对缓存增删改查
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
//增/改
func (c *Cache) Add(key string, value Value) {
//如果健存在,更新健值并将起移到队尾,因双向链表队尾是相对的
if ele, ok := c.cache[key]; ok {
c.ll.MoveToBack(ele)
kv := ele.Value.(*entry)
c.uBytes += int64(value.Len()) - int64(kv.value.Len())
kv.value = value
} else {
//不存在则新增并向队尾添加节点,并在字典中添加key和节点映射关系
//更新已使用容量,如果设置最大容量,则移除最少访问的节点
ele := c.ll.PushBack(&entry{key: key, value: value})
c.cache[key] = ele
c.uBytes += int64(len(key)) + int64(value.Len())
}
for c.maxBytes != 0 && c.uBytes > c.maxBytes {
c.RemoveOldEle()
}
}

//删
func (c *Cache) RemoveOldEle() {
//取队首节点
ele := c.ll.Front()
if ele != nil {
//从链表删除并从cache删除该节点的映射关系
c.ll.Remove(ele)
kv := ele.Value.(*entry)
delete(c.cache, kv.key)
//更新已使用容量
c.uBytes -= int64(len(kv.key)) + int64(kv.value.Len())
//回调函数
if c.OnRemoved != nil {
c.OnRemoved(kv.key, kv.value)
}
}
}

//查
func (c *Cache) Get(key string) (value Value, ok bool) {
//从cache中找到双向链表的节点并将该节点移到队尾
if ele, ok := c.cache[key]; ok {
c.ll.MoveToBack(ele)
kv := ele.Value.(*entry)
return kv.value, ok
}
return
}
  • 测试
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package lru

import (
"reflect"
"testing"
)

type String string

func (s String) Len() int {
return len(s)
}

func TestGet(t *testing.T) {
c := New(0, nil)
c.Add("key1", String("val1"))
if v, ok := c.Get("key1"); !ok string(v.(String)) != "val1" {
t.Fatalf("cache hit key1=val1 failed")
}
if _, ok := c.Get("key2"); ok {
t.Fatalf("cache miss key2 failed")
}
}

func TestRemoveOldEle(t *testing.T) {
k1, k2, k3 := "key1", "key2", "key3"
v1, v2, v3 := "val1", "val2", "val3"
maxBytes := len(k1 + k2 + v1 + v2)
c := New(int64(maxBytes), nil)
c.Add(k1, String(v1))
c.Add(k2, String(v2))
c.Add(k3, String(v3))

if _, ok := c.Get("key1"); ok c.Len() != 2 {
t.Fatalf("removeoldele key1 failed")
}
}

func TestOnRemoved(t *testing.T) {
keys := make([]string, 0)
callback := func(key string, value Value) {
keys = append(keys, key)
}
c := New(int64(10), callback)
c.Add("k1", String("v1"))
c.Add("k2", String("v2"))
c.Add("k3", String("v3"))
c.Add("k4", String("k4"))

expect := []string{"k1", "k2"}
if !reflect.DeepEqual(expect, keys) {
t.Fatalf("call onremoved failed, expect keys equals to %s, get %s", expect, keys)
}
}

从0开始搭建运维平台

为了便于管理,安装jumpserver

  • 生成加密私钥

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    if [ ! "$SECRET_KEY" ]; then
    SECRET_KEY=$(cat /dev/urandom tr -dc A-Za-z0-9 head -c 50)
    echo "SECRET_KEY=$SECRET_KEY" >>~/.bashrc
    echo $SECRET_KEY
    else
    echo $SECRET_KEY
    fi
    if [ ! "$BOOTSTRAP_TOKEN" ]; then
    BOOTSTRAP_TOKEN=$(cat /dev/urandom tr -dc A-Za-z0-9 head -c 16)
    echo "BOOTSTRAP_TOKEN=$BOOTSTRAP_TOKEN" >>~/.bashrc
    echo $BOOTSTRAP_TOKEN
    else
    echo $BOOTSTRAP_TOKEN
    fi
  • 创建mysql数据库及账号密码

    1
    2
    create database jumpserver default charset 'utf8' collate 'utf8_bin';
    grant all on jumpserver.* to 'jumpserver'@'%' identified by 'xxxxxxxxxx';
  • 通过docker启动jumpserver

    1
    2
    #xxx是目录 xxxx对外暴露端口 xxxx.com是某个域名
    docker run --name jms_all -d -v /xxx/jumpserver:/opt/jumpserver/data/media -p xxxx:80 -p xxxx:2222 -e SECRET_KEY=xxx -e BOOTSTRAP_TOKEN=xxx -e DB_HOST=1.1.1.1 -e DB_PORT=3306 -e DB_USER=jumpserver -e DB_PASSWORD=xxx -e DB_NAME=jumpserver -e REDIS_HOST=1.1.1.1 -e REDIS_PORT=6379 dockerhub.xxxx.com/jumpserver/jms_all:2.1.1
  • 配置jumpserver

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    #创建管理用户xxx_root
    useradd xxx_root
    #根据提示生成一对公私钥,建议设置私钥密码
    ssh-keygen
    #建议在ecs模版集成xxx_root管理用户
    useradd xxx_root
    mkdir -p /home/xxx_root/.ssh
    chmod 700 /home/xxx_root/.ssh
    echo "sshkey" > /home/xxx_root/.ssh/authorized_keys
    chmod 600 /home/xxx_root/.ssh/authorized_keys
    chown xxx_root.xxx_root -R /home/xxx_root
    echo "xxx_root ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
    #登录jumpserver根据提示配置
    #系统设置
    #管理用户(xxx_root)
    #系统用户(jump_root/backend/web/devops)
    #资产列表

开通ack专用版

  • 1.20.4/docker/flanel/ipvs

  • 初始化

    1
    2
    3
    4
    5
    #master节点三个,调整apiSever服务端口访问1024-65535
    #node节点任意
    安装node_exporter/dnsmasq/crontab(清理go_log日志/docker镜像)
    创建命名空间并配置拉取镜像密钥
    推荐给节点打标签分组groupname=xxx_ops
  • 配置资产管理系统(resources-collector/cmdb)

    1
    2
    3
    #resources-collector主要从云厂商拉取资源
    创建数据库cmdb
    #cmdb对内提供资源/服务信息相关接口
  • 新建k8s-manager

    1
    对k8sApi封装,方便对内调用
  • 新建configmgr

    1
    配置中心
  • 新建opscenter

    1
    统一认证网关
  • 新建dbman

    1
    数据库操作相关
  • 新建ops-helper

    1
    对运维助手封装
  • 新建ops-frontend-v2

    1
    运维平台前端管理页面
  • 新建publish-system-v2

    1
    发布系统

任务调度demo

  • 模拟任务调度
  • 同步(串行)任务/异步(并行)任务
  • 遇到同步任务需执行完成后再执行后续任务
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package main

import (
"context"
"fmt"
"sync"
"time"
)

func test(wg *sync.WaitGroup, ctx context.Context, jobId int) {
defer wg.Done()
for i := 0; i < 5; i++ {
select {
case <-ctx.Done():
fmt.Printf("任务Id:%d,异常退出\n", jobId)
return
default:
fmt.Printf("任务Id:%d,执行第%d次\n", jobId, i)
if jobId > 2 {
time.Sleep(time.Second * 5)
} else {
time.Sleep(time.Second * 2)
}

}
}
}

func main() {
ctx, cancel := context.WithCancel(context.Background())
wg := new(sync.WaitGroup)
go func() {
time.Sleep(time.Second * 20)
cancel()
}()
for i := 0; i < 5; i++ {
wg.Add(1)
go test(wg, ctx, i)

if i < 1 {
wg.Wait()
select {
case <-ctx.Done():
fmt.Println("main1 异常退出")
return
default:
fmt.Println("1 select")
}
}
}
wg.Wait()
select {
case <-ctx.Done():
fmt.Println("main2 异常退出")
return
default:
fmt.Println("2 select")
}
//测试阻塞
select {}
}

python格式化输出字符串

1
2
3
4
5
6
7
8
9
10
# 方式一
# val = {"host": ip, "ttl": 60 }
# cmd_string = f"/bin/etcdctl put /coredns/{flag} '{json.dumps(val)}'"
# 方式二
# cmd_string = """/bin/etcdctl put /coredns/{flag} '{{"host": "{ip}","ttl": 60}}'""".format(flag=flag, ip=ip)
# 方式三
# cmd_string = """/bin/etcdctl put /coredns/{0} '{{"host": "{1}","ttl": 60}}'""".format(flag, ip)
# 方式四
# cmd_string = f"""/bin/etcdctl put /coredns/{flag} '{{"host": "{ip}","ttl": 60}}'"""
# print(cmd_string)

k8s搭建devops环境

主要内容:

  • 使用kubeadm搭建kubernetes环境
  • 安装flannel网络插件
  • 搭建nfs服务器
  • 安装nfs provisioner
  • 安装helm
  • 安装nginx ingress
  • 安装Jenkins
  • 安装gitlab
  • 安装harbor

具体步骤:

安装ansible、expect

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#批量授权脚本
cat plssh.sh
#!/bin/bash
# Author: Ropon
# Blog: https://www.ropon.top
declare -A CserverLst
CserverLst=([s1]="192.168.8.151" [s2]="192.168.8.152")
cport="22"
cpasswd="ropon.top"
ansible_host="/etc/ansible/hosts"
tmpsshfile="/tmp/ssh.exp"
flag="k8snode"

yum install -y ansible expect
echo '#!/usr/bin/expect
spawn ssh-keygen
expect {
"*.ssh/id_rsa*" {exp_send "\r";exp_continue}
"*passphrase*" {exp_send "\r";exp_continue}
"*again*" {exp_send "\r"}
}' > $tmpsshfile
expect $tmpsshfile
sleep 1
echo "[$flag]" >> $ansible_host

for key in ${!CserverLst[*]}; do
cat > $tmpsshfile << EOF
#!/usr/bin/expect
spawn ssh-copy-id ${CserverLst[$key]} -p ${cport}
expect {
"*yes/no*" {exp_send "yes\r";exp_continue}
"*password*" {exp_send "${cpasswd}\r";exp_continue}
}
EOF
expect $tmpsshfile
echo "${CserverLst[$key]} ansible_ssh_port=${cport}" >> $ansible_host
done
ansible $flag -m ping

kubernets

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#配置:至少2台2核心4GB
#检查系统主机名
#master和node执行以下命令检查:
cat /etc/redhat-release
lscpugrep CPU
#修改主机名
hostnamectl set-hostname master01
hostnamectl set-hostname node01
hostnamectl set-hostname node02
#查看修改结果
hostnamectl status
#配置hosts文件
echo "127.0.0.1 $(hostname)" >> /etc/hosts
cat >> /etc/hosts << EOF
192.168.8.150 master01
192.168.8.151 node01
192.168.8.152 node02
EOF
#关闭防护墙
systemctl disable firewalld
systemctl stop firewalld
systemctl disable iptables
systemctl stop iptables
sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
setenforce 0
#禁用swap
swapoff -a
sed -i.bak '/swap/s/^/#/' /etc/fstab

#master node节点批量执行
echo "
#新增br_netfilter ipvs模块
#!/bin/bash
# Author: Ropon
# Blog: https://www.ropon.top
cat > /etc/sysconfig/modules/br_netfilter_ipvs.modules << EOF
modprobe br_netfilter
modprobe ip_vs
modprobe ip_vs_rr
modprobe ip_vs_wrr
modprobe ip_vs_sh
modprobe nf_conntrack_ipv4
EOF
chmod 755 /etc/sysconfig/modules/br_netfilter_ipvs.modules
cat > /etc/rc.sysinit << EOF
#!/bin/bash
for file in /etc/sysconfig/modules/*.modules ; do
[ -x $file ] && \$file
done
EOF
#优化内核参数
cat > /etc/sysctl.d/k8s.conf << EOF
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
modprobe br_netfilter
sysctl -p /etc/sysctl.d/k8s.conf
lsmod grep br_netfilter" > netfilter.sh

ansible k8snode -m copy -a 'src=/root/netfilter.sh dest=/root/netfilter.sh mode=744'
ansible k8snode -m shell -a 'bash /root/netfilter.sh'
echo "#设置源
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
yum clean all
yum makecache fast -y" > yum.sh

ansible k8snode -m copy -a 'src=/root/yum.sh dest=/root/yum.sh mode=744'
ansible k8snode -m shell -a 'bash /root/yum.sh'

#安装docker
echo "yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install docker-ce-18.09.0 docker-ce-cli-18.09.0 containerd.io-1.2.13 -y
mkdir -p /etc/docker
#k8s推荐使用systemd,然而docker默认以ccgroups方式启动,故做以下修改
#k8s配置文件/var/lib/kubelet/kubeadm-flags.env
tee /etc/docker/daemon.json <<-'EOF'
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors": ["https://xxx.mirror.aliyuncs.com"]
}
EOF
systemctl daemon-reload
systemctl start docker
systemctl enable docker" > docker.sh

ansible k8snode -m copy -a 'src=/root/docker.sh dest=/root/docker.sh mode=744'
ansible k8snode -m shell -a 'bash /root/docker.sh'

#安装k8s
#master node
echo "yum install -y kubelet-1.16.9 kubeadm-1.16.9 kubectl-1.16.9
systemctl enable kubelet" > k8s.sh

ansible k8snode -m copy -a 'src=/root/k8s.sh dest=/root/k8s.sh mode=744'
ansible k8snode -m shell -a 'bash /root/k8s.sh'

#master初始化集群
kubeadm init --kubernetes-version=1.16.9 \
--apiserver-advertise-address=192.168.8.150 \
--image-repository registry.aliyuncs.com/google_containers \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16
#安装flannel插件
kubectl apply -f kube-flannel.yml
#配置kubectl
mkdir -p /root/.kube
cp /etc/kubernetes/admin.conf /root/.kube/config
#node执行加入集群
kubeadm join 192.168.8.150:6443 --token xxxxxxxxxxxxxxxx \
--discovery-token-ca-cert-hash sha256:xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
#命令补全
#创建别名
alias k=kubectl
yum install -y bash-completion
source /usr/share/bash-completion/bash_completion
source <(kubectl completion bash)
cd ~;echo "source <(kubectl completion bash)" >> .bashrc
#配置node kubectl
mkdir -p /root/.kube
cp /etc/kubernetes/admin.conf /root/.kube/config
scp -P 22 /root/.kube/config node01:/root/.kube/config
scp -P 22 /root/.kube/config node02:/root/.kube/config
#测试
kubectl get node -A
#安装nfs
yum install nfs-utils rpcbind -y
systemctl enable rpcbind.service
systemctl enable nfs.service
mkdir /home/k8sdata
chown nfsnobody.nfsnobody /home/k8sdata
echo "/home/k8sdata 192.168.8.150(rw,sync,root_squash) 192.168.8.151(rw,sync,root_squash) 192.168.8.152(rw,sync,root_squash)">>/etc/exports
systemctl start rpcbind
systemctl start nfs
showmount -e localhost
#测试
showmount -e 192.168.8.150
mkdir /test
mount 192.168.8.150:/home/k8sdata /test/
cd /test/
echo "ok" > test.txt
#安装provisioner
kubectl -f rbac.yaml
kubectl -f storageclass-nfs.yaml
#注意修改nfs服务地址
kubectl -f deployment-nfs.yaml
#安装helm
wget http://panel.ropon.top/soft/helm-v3.2.4-linux-amd64.tar.gz
tar xf helm-v3.2.4-linux-amd64.tar.gz
mv linux-amd64/helm /usr/bin
helm version
rm -rf helm-v3.2.4-linux-amd64.tar.gz linux-amd64/
#添加国内源
helm repo add stable https://kubernetes.oss-cn-hangzhou.aliyuncs.com/charts
helm repo update
#安装ingress
#注意修改api-server - --service-node-port-range=1-65535
/etc/kubernetes/manifests/kube-apiserver.yaml
systemctl daemon-reload
systemctl restart kubelet
kubectl apply -f ingress.yaml
kubectl apply -f ingress-svc.yaml
#安装jenkins
helm search repo stable/jenkins
helm pull stable/jenkins
#修改values.yaml文件
Image: "jenkinsci/blueocean"
ImageTag: "latest"
ImagePullPolicy: "IfNotPresent"
HostName: jenkins.ropon.top
AdminPassword: xxxxxx
#修改Jenkins时间
JavaOpts: >
-Djava.awt.headless=true
-Dorg.apache.commons.jelly.tags.fmt.timeZone=Asia/Shanghai
-Dfile.encoding=UTF-8
ServiceType: ClusterIP
#LoadBalancerSourceRanges:
#- 0.0.0.0/0
#取消自动安装插件
InstallPlugins:
#- kubernetes:1.1
#- workflow-aggregator:2.5
#- workflow-job:2.15
#- credentials-binding:1.13
#- git:3.6.4
StorageClass: "managed-nfs-storage"
rbac:
install: true
helm install jenkins .
#安装jenkins插件
#更新源(web面板修改)
https://mirrors.tuna.tsinghua.edu.cn/jenkins/updates/update-center.json
cd /var/jenkins_home/updates
sed -i 's/http:\/\/updates.jenkins-ci.org\/download/https:\/\/mirrors.tuna.tsinghua.edu.cn\/jenkins/g' default.json && sed -i 's/http:\/\/www.google.com/https:\/\/www.baidu.com/g' default.json
#手工安装以下插件
Chinese
pipeline
kubernets
gitlab
#安装gitlab
cat > gitlab-setup.sh << EOF
#!/bin/bash
mkdir -p /home/gitlab
docker run --detach \\
--hostname xxxx.ropon.top \\
--env GITLAB_OMNIBUS_CONFIG="external_url 'http://xxxx.ropon.top/'; gitlab_rails['gitlab_shell_ssh_port'] = 6022;" \\
--publish 443:443 --publish 80:80 --publish 6022:22 \\
--name gitlab \\
--restart always \\
--volume /home/gitlab/config:/etc/gitlab \\
--volume /home/gitlab/logs:/var/log/gitlab \\
--volume /home/gitlab/data:/var/opt/gitlab \\
--cpus 2 \\
--memory 2048MB \\
gitlab/gitlab-ce:11.2.2-ce.0
EOF
sh gitlab-setup.sh
#启动https
/etc/gitlab/gitlab.rb
nginx['redirect_http_to_https'] =true
nginx['ssl_certificate'] = "/etc/gitlab/ssl/server.crt"
nginx['ssl_certificate_key'] = "/etc/gitlab/ssl/server.key"
#安装harbor
wget http://panel.ropon.top/k8s/harbor-offline-installer-v1.8.2.tgz
tar xf harbor-offline-installer-v1.8.2.tgz
#修改harbor.yml文件
hostname: xxxx.ropon.top
#开启https
port: 443
certificate: /home/harbor/ropon.top.crt
private_key: /home/harbor/ropon.top.key
#下载docker-compose
wget http://panel.ropon.top/soft/docker-compose-Linux-x86_64
mv docker-compose-Linux-x86_64 /usr/bin/docker-compose
./prepare
./install.sh
#jenkins配置k8s
https://kubernetes.default
default
http://jenkins.default:8080
jenkins-agent.default:50000
#配置gitlab
#新建任务,进入流水线任务编辑,勾选Build when a change is pushed to GitLab
Admin area => Settings => Outbound requests 勾选
project => Settings => Integrations
#创建拉取镜像秘钥
kubectl create secret docker-registry hellogoregistrykey --docker-server=xxxx.ropon.top --docker-username=admin --docker-password=xxxxxx --docker-email=ropon@ropon.top
#之前使用iptables后启动ipvs
kubectl -n kube-system edit cm kube-proxy
mode: "ipvs"
#删除之前pod等待重建
kubectl get pod -n kube-system grep kube-proxy awk '{system("kubectl delete pod "$1" -n kube-system")}'

Pipeline

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def gitlabUrl = "gitlab.ropon.top"
def harborUrl = "harbor.ropon.top"
def GroupName = "testgo"
def projectName = "hellogo"
def imageTag = "latest"
def kubectlImage = "lachlanevenson/k8s-kubectl:v1.16.9"
def branchName = "master"
def gitAuthName = "gitlab-auth-user"
def harborAuthName = "harbor-auth-user"
def sendmsgAuthName = "sendmsg-auth-user"
def msgText = "构建完成,请测试"

podTemplate(cloud: 'kubernetes',containers: [
containerTemplate(name: 'docker', image: 'docker:stable', command: 'cat', ttyEnabled: true),
containerTemplate(name: 'kubectl', image: "${kubectlImage}", command: 'cat', ttyEnabled: true)
],
volumes: [
hostPathVolume(hostPath: '/var/run/docker.sock', mountPath: '/var/run/docker.sock'),
hostPathVolume(hostPath: '/root/.kube', mountPath: '/root/.kube')
]
)

{
node (POD_LABEL) {
stage('pull code') {
checkout([$class: 'GitSCM', branches: [[name: "*/${branchName}"]], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: "${gitAuthName}", url: "http://${gitlabUrl}/${GroupName}/${projectName}.git"]]])
}
container('docker') {
stage('docker-build') {
withCredentials([usernamePassword(credentialsId: "${harborAuthName}", passwordVariable: 'password', usernameVariable: 'username')]) {
sh "docker login -u $username -p $password $harborUrl"
}
sh "docker build -t ${projectName}:${imageTag} ."
def imageName = "${projectName}:${imageTag}"
def remoteImageName = "${harborUrl}/${GroupName}/${imageName}"
sh "docker tag $imageName $remoteImageName"
sh "docker push $remoteImageName"
sh "docker rmi $imageName"
sh "docker rmi $remoteImageName"
}
}
container('kubectl') {
stage('k8s deploy') {
sh "kubectl --kubeconfig=/root/.kube/config apply -f deployment.yaml"
}
}
stage('send msg') {
withCredentials([usernamePassword(credentialsId: "${sendmsgAuthName}", passwordVariable: 'password', usernameVariable: 'username')]) {
sh "wget http://panel.ropon.top/soft/sendmsg && chmod +x sendmsg && ./sendmsg $password $username $msgText"
}
}
}
}

Docerfile

1
2
3
4
5
6
7
8
9
10
11
12
13
FROM golang:1.13-alpine3.10 as builder
ENV GO111MODULE=on \
CGO_ENABLED=0 \
GOOS=linux \
GOARCH=amd64 \
GOPROXY=https://goproxy.cn

COPY . /app/
RUN cd /app && go build -o hellogo .

FROM scratch
COPY --from=builder /app/hellogo /hellogo
ENTRYPOINT ["/hellogo"]

Deployment

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
apiVersion: v1  #注意版本号
kind: Service
metadata:
name: myapp
spec:
type: ClusterIP
selector: #属性,选择器
app: hello
ports:
- name: http
port: 9000
targetPort: 9000
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: hellogo
spec:
rules:
- host: hellogo.ropon.top
http:
paths:
- backend:
serviceName: myapp
servicePort: 9000
---
apiVersion: apps/v1 #描述文件遵循extensions/v1beta1版本的Kubernetes API
kind: Deployment #创建资源类型为Deployment
metadata: #该资源元数据
name: test-hello #Deployment名称
spec: #Deployment的规格说明
selector:
matchLabels:
app: hello
replicas: 2 #指定副本数为3
template: #定义Pod的模板
metadata: #定义Pod的元数据
labels: #定义label(标签)
app: hello #label的key和value分别为app和nginx
spec: #Pod的规格说明
imagePullSecrets:
- name: hellogoregistrykey
containers:
- name: hellogo #容器的名称
image: harbor.ropon.top/testgo/hellogo:v4 #创建容器所使用的镜像
imagePullPolicy: IfNotPresent
ports:
- containerPort: 9000

k8s v1.20.11

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#docker版本
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install docker-ce-19.03.15 docker-ce-cli-19.03.15 containerd.io-1.2.13 -y
mkdir -p /etc/docker
tee /etc/docker/daemon.json <<-EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors": ["https://xxxxx.mirror.aliyuncs.com"]
}
EOF
systemctl daemon-reload
systemctl start docker
systemctl enable docker

#k8s版本
yum install -y kubelet-1.20.11 kubeadm-1.20.11 kubectl-1.20.11
systemctl enable kubelet

#其他同上

go操作etcd

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package main

import (
"context"
"crypto/tls"
"crypto/x509"
"fmt"
"io/ioutil"
"log"
"time"

"go.etcd.io/etcd/clientv3"
)

// etcd client put/get demo
// use etcd/clientv3

func main() {
//使用https链接etcd
var etcdCert = "./etcd.pem"
var etcdCertKey = "./etcd-key.pem"
var etcdCa = "./ca.pem"

cert, err := tls.LoadX509KeyPair(etcdCert, etcdCertKey)
if err != nil {
return
}

caData, err := ioutil.ReadFile(etcdCa)
if err != nil {
return
}

pool := x509.NewCertPool()
pool.AppendCertsFromPEM(caData)

_tlsConfig := &tls.Config{
Certificates: []tls.Certificate{cert},
RootCAs: pool,
}
cli, err := clientv3.New(clientv3.Config{
Endpoints: []string{"https://192.168.7.150:2379"},
DialTimeout: 5 * time.Second,
TLS: _tlsConfig,
})
if err != nil {
// handle error!
fmt.Printf("connect to etcd failed, err:%v\n", err)
return
}
fmt.Println("connect to etcd success")
defer cli.Close()
//// put
//ctx, cancel := context.WithTimeout(context.Background(), time.Second)
//_, err = cli.Put(ctx, "ropon", "666")
//cancel()
//if err != nil {
// fmt.Printf("put to etcd failed, err:%v\n", err)
// return
//}
//// get
//ctx, cancel = context.WithTimeout(context.Background(), time.Second)
//resp, err := cli.Get(ctx, "ropon")
//cancel()
//if err != nil {
// fmt.Printf("get from etcd failed, err:%v\n", err)
// return
//}
//for _, ev := range resp.Kvs {
// fmt.Printf("%s:%s\n", ev.Key, ev.Value)
//}

//// watch key:q1mi change
//rch := cli.Watch(context.Background(), "west") // <-chan WatchResponse
//for wresp := range rch {
// for _, ev := range wresp.Events {
// fmt.Printf("Type: %s Key:%s Value:%s\n", ev.Type, ev.Kv.Key, ev.Kv.Value)
// }
//}

// 创建一个5秒的租约
resp, err := cli.Grant(context.TODO(), 5)
if err != nil {
log.Fatal(err)
}

// 5秒钟之后, /ropon/ 这个key就会被移除
_, err = cli.Put(context.TODO(), "/ropon/", "8888", clientv3.WithLease(resp.ID))
if err != nil {
log.Fatal(err)
}

// the key 'foo' will be kept forever
ch, kaerr := cli.KeepAlive(context.TODO(), resp.ID)
if kaerr != nil {
log.Fatal(kaerr)
}
for {
ka := <-ch
fmt.Println("ttl:", ka.TTL)
}
}