os-upgrade-automation/playbook/roles/self_healing/tasks/main.yml

57 lines
2.2 KiB
YAML

---
- name: Self-Healing: Starte kritische Dienste neu, falls sie nicht laufen
service:
name: "{{ item }}"
state: restarted
register: restart_result
loop: "{{ critical_services | default(['sshd','cron']) }}"
when: item in services and (services[item].state != 'running' and services[item].state != 'started')
ignore_errors: true
- name: Prüfe, ob Restart erfolgreich war
service_facts:
- name: Logge Self-Healing-Resultate
copy:
content: |
Self-Healing-Report für {{ inventory_hostname }}
Zeit: {{ ansible_date_time.iso8601 }}
Kritische Dienste: {{ critical_services | default(['sshd','cron']) }}
Restart-Resultate: {{ restart_result.results | default(restart_result) | to_nice_json }}
Service-Status nach Restart:
{% for item in critical_services | default(['sshd','cron']) %}
- {{ item }}: {{ services[item].state | default('unbekannt') }}
{% endfor %}
dest: "{{ log_dir }}/self_healing_{{ inventory_hostname }}.log"
ignore_errors: true
- name: Eskaliere per Mail, wenn Restart fehlschlägt
mail:
host: "localhost"
port: 25
to: "{{ linux_admins_mail }}"
subject: "[SELF-HEALING-FAIL] Dienst konnte nicht neu gestartet werden auf {{ inventory_hostname }}"
body: |
Self-Healing konnte einen oder mehrere kritische Dienste nicht erfolgreich neu starten!
Siehe Log: {{ log_dir }}/self_healing_{{ inventory_hostname }}.log
Zeit: {{ ansible_date_time.iso8601 }}
when: >-
restart_result is defined and (
(restart_result.results is defined and (restart_result.results | selectattr('state', 'ne', 'running') | list | length > 0))
or
(restart_result.state is defined and restart_result.state != 'running')
)
ignore_errors: true
- name: Self-Healing: Bereinige /tmp, /var/tmp, /var/log/alt bei wenig Speicherplatz
shell: rm -rf /tmp/* /var/tmp/* /var/log/alt/*
when: ansible_mounts[0].size_available < 10737418240 # <10GB frei
ignore_errors: true
- name: Self-Healing: Netzwerkdienst neu starten bei Netzwerkproblemen
service:
name: network
state: restarted
when: ansible_default_ipv4 is not defined or ansible_default_ipv4['address'] is not defined
ignore_errors: true