feat: execution failure reports, auto-printer for WMT, UTC timezone fix for all timestamps
This commit is contained in:
@@ -23,6 +23,7 @@ class AnsibleService:
|
||||
SETTINGS_FILE = Path("data/ansible_settings.json")
|
||||
DEFAULT_SETTINGS = {
|
||||
"ssh_fallback_password": "raspberry",
|
||||
"use_password_auth": False,
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
@@ -30,13 +31,16 @@ class AnsibleService:
|
||||
self.ansible_dir = Path("ansible")
|
||||
self.inventory_file = self.ansible_dir / "inventory" / "dynamic_inventory.yaml"
|
||||
self.playbook_dir = self.ansible_dir / "playbooks"
|
||||
self.ssh_key_path = Path.home() / ".ssh" / "ansible_key"
|
||||
self.ssh_keys_dir = self.ansible_dir / "ssh_keys"
|
||||
self.ssh_key_path = self.ssh_keys_dir / "app_key"
|
||||
self.ansible_cfg_path = self.ansible_dir / "ansible.cfg"
|
||||
|
||||
# Ensure directories exist
|
||||
self.ansible_dir.mkdir(exist_ok=True)
|
||||
(self.ansible_dir / "inventory").mkdir(exist_ok=True)
|
||||
(self.ansible_dir / "playbooks").mkdir(exist_ok=True)
|
||||
(self.ansible_dir / "roles").mkdir(exist_ok=True)
|
||||
self.ssh_keys_dir.mkdir(mode=0o700, exist_ok=True)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Settings helpers #
|
||||
@@ -136,12 +140,24 @@ class AnsibleService:
|
||||
'ansible_host': '127.0.0.1'
|
||||
}
|
||||
else:
|
||||
hvars = {
|
||||
'ansible_host': device.device_ip,
|
||||
'ansible_user': 'pi',
|
||||
'ansible_ssh_private_key_file': str(self.ssh_key_path),
|
||||
'ansible_ssh_common_args': '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
}
|
||||
settings = self.load_settings()
|
||||
use_password = settings.get('use_password_auth', False)
|
||||
ssh_password = settings.get('ssh_fallback_password', '')
|
||||
if use_password and ssh_password:
|
||||
hvars = {
|
||||
'ansible_host': device.device_ip,
|
||||
'ansible_user': 'pi',
|
||||
'ansible_password': ssh_password,
|
||||
'ansible_become_password': ssh_password,
|
||||
'ansible_ssh_common_args': '-o PubkeyAuthentication=no -o PreferredAuthentications=password -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
}
|
||||
else:
|
||||
hvars = {
|
||||
'ansible_host': device.device_ip,
|
||||
'ansible_user': 'pi',
|
||||
'ansible_ssh_private_key_file': str(self.ssh_key_path.resolve()),
|
||||
'ansible_ssh_common_args': '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
}
|
||||
children['monitoring_devices']['hosts'][device.hostname] = hvars
|
||||
synced += 1
|
||||
self._write_inventory(data)
|
||||
@@ -249,7 +265,7 @@ class AnsibleService:
|
||||
'name': 'Update monitoring devices',
|
||||
'hosts': 'all',
|
||||
'become': True,
|
||||
'gather_facts': True,
|
||||
'gather_facts': False,
|
||||
'tasks': [
|
||||
{
|
||||
'name': 'Update apt cache',
|
||||
@@ -268,40 +284,24 @@ class AnsibleService:
|
||||
'register': 'upgrade_result'
|
||||
},
|
||||
{
|
||||
'name': 'Restart device if required',
|
||||
'reboot': {
|
||||
'reboot_timeout': 600
|
||||
},
|
||||
'when': 'upgrade_result.changed'
|
||||
},
|
||||
{
|
||||
'name': 'Check service status',
|
||||
'systemd': {
|
||||
'name': 'prezenta.service',
|
||||
'state': 'started'
|
||||
'name': 'Show upgrade result',
|
||||
'debug': {
|
||||
'msg': '{{ upgrade_result.stdout_lines }}'
|
||||
}
|
||||
},
|
||||
{
|
||||
'name': 'Report update completion',
|
||||
'uri': {
|
||||
'url': 'http://{{ ansible_controller_ip }}/api/update_complete',
|
||||
'method': 'POST',
|
||||
'body_format': 'json',
|
||||
'body': {
|
||||
'hostname': '{{ inventory_hostname }}',
|
||||
'device_ip': '{{ ansible_host }}',
|
||||
'status': 'completed',
|
||||
'packages_updated': '{{ upgrade_result.stdout_lines | length }}'
|
||||
}
|
||||
'name': 'Clean up apt cache',
|
||||
'apt': {
|
||||
'autoclean': True
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
playbook_path = self.playbook_dir / "update_devices.yml"
|
||||
with open(playbook_path, 'w') as f:
|
||||
yaml.dump([playbook_content], f, default_flow_style=False)
|
||||
|
||||
|
||||
return str(playbook_path)
|
||||
|
||||
def create_restart_service_playbook(self) -> str:
|
||||
@@ -390,6 +390,17 @@ class AnsibleService:
|
||||
# Add extra variables
|
||||
if extra_vars:
|
||||
cmd.extend(['--extra-vars', json.dumps(extra_vars)])
|
||||
|
||||
# Inject password auth vars if enabled (overrides per-host inventory vars)
|
||||
settings = self.load_settings()
|
||||
if settings.get('use_password_auth') and settings.get('ssh_fallback_password'):
|
||||
pwd = settings['ssh_fallback_password']
|
||||
cmd.extend(['--extra-vars', json.dumps({
|
||||
'ansible_password': pwd,
|
||||
'ansible_become_password': pwd,
|
||||
'ansible_ssh_private_key_file': '',
|
||||
'ansible_ssh_common_args': '-o PubkeyAuthentication=no -o PreferredAuthentications=password -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
})])
|
||||
|
||||
# Create enhanced execution record using new model
|
||||
execution_id = str(uuid.uuid4())
|
||||
@@ -416,12 +427,19 @@ class AnsibleService:
|
||||
with tempfile.NamedTemporaryFile(mode='w+', suffix='.log', delete=False) as log_file:
|
||||
log_file_path = log_file.name
|
||||
|
||||
env = os.environ.copy()
|
||||
env['PYTHONUNBUFFERED'] = '1'
|
||||
env['ANSIBLE_FORCE_COLOR'] = '0'
|
||||
env['ANSIBLE_NOCOLOR'] = '1'
|
||||
env['ANSIBLE_CONFIG'] = str(self.ansible_cfg_path.resolve())
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
cwd=str(self.ansible_dir)
|
||||
cwd=str(self.ansible_dir),
|
||||
env=env,
|
||||
)
|
||||
|
||||
stdout, stderr = process.communicate()
|
||||
@@ -435,11 +453,12 @@ class AnsibleService:
|
||||
execution.stderr_log = stderr
|
||||
execution.ansible_log_file = log_file_path
|
||||
|
||||
# Always parse recap stats regardless of exit code —
|
||||
# Ansible exits non-zero when any host fails/is unreachable.
|
||||
self._parse_ansible_results_enhanced(execution, stdout)
|
||||
if process.returncode == 0:
|
||||
execution.status = 'completed'
|
||||
execution.summary_message = 'Playbook executed successfully'
|
||||
# Parse stdout for success/failure counts
|
||||
self._parse_ansible_results_enhanced(execution, stdout)
|
||||
else:
|
||||
execution.status = 'failed'
|
||||
execution.summary_message = f'Playbook failed with exit code {process.returncode}'
|
||||
@@ -474,10 +493,14 @@ class AnsibleService:
|
||||
|
||||
def execute_playbook_async(self, playbook_name: str, limit_hosts: List[str] = None,
|
||||
extra_vars: Dict = None, priority: int = 5,
|
||||
max_retries: int = 0) -> Dict:
|
||||
max_retries: int = 0,
|
||||
force_password_auth: bool = False) -> Dict:
|
||||
"""
|
||||
Start a playbook in a background thread.
|
||||
Returns immediately with the execution_id so the caller can poll /live.
|
||||
force_password_auth=True overrides the use_password_auth setting and always
|
||||
injects password vars — used by distribute_ssh_keys which must run before
|
||||
keys are deployed.
|
||||
"""
|
||||
try:
|
||||
self.generate_dynamic_inventory()
|
||||
@@ -498,6 +521,17 @@ class AnsibleService:
|
||||
# Pass all extra vars as a single JSON string to avoid value-quoting issues
|
||||
cmd.extend(['--extra-vars', json.dumps(extra_vars)])
|
||||
|
||||
# Inject password auth vars if enabled OR forced
|
||||
settings = self.load_settings()
|
||||
if (force_password_auth or settings.get('use_password_auth')) and settings.get('ssh_fallback_password'):
|
||||
pwd = settings['ssh_fallback_password']
|
||||
cmd.extend(['--extra-vars', json.dumps({
|
||||
'ansible_password': pwd,
|
||||
'ansible_become_password': pwd,
|
||||
'ansible_ssh_private_key_file': '',
|
||||
'ansible_ssh_common_args': '-o PubkeyAuthentication=no -o PreferredAuthentications=password -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
|
||||
})])
|
||||
|
||||
# Create a persistent log file (NOT deleted on close)
|
||||
log_fd, log_file_path = tempfile.mkstemp(suffix='.log', prefix='ansible_')
|
||||
os.close(log_fd)
|
||||
@@ -546,6 +580,7 @@ class AnsibleService:
|
||||
env['PYTHONUNBUFFERED'] = '1'
|
||||
env['ANSIBLE_FORCE_COLOR'] = '0'
|
||||
env['ANSIBLE_NOCOLOR'] = '1'
|
||||
env['ANSIBLE_CONFIG'] = str(self.ansible_cfg_path.resolve())
|
||||
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
@@ -586,10 +621,12 @@ class AnsibleService:
|
||||
execution.completed_at = datetime.utcnow()
|
||||
execution.exit_code = process.returncode
|
||||
execution.stdout_log = full_output
|
||||
# Always parse recap stats regardless of exit code —
|
||||
# Ansible exits non-zero when any host fails/is unreachable.
|
||||
self._parse_ansible_results_enhanced(execution, full_output)
|
||||
if process.returncode == 0:
|
||||
execution.status = 'completed'
|
||||
execution.summary_message = 'Playbook executed successfully'
|
||||
self._parse_ansible_results_enhanced(execution, full_output)
|
||||
else:
|
||||
execution.status = 'failed'
|
||||
execution.summary_message = f'Playbook failed (exit {process.returncode})'
|
||||
@@ -653,62 +690,62 @@ class AnsibleService:
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
def _parse_ansible_results_enhanced(self, execution: PlaybookExecution, output: str):
|
||||
"""Parse Ansible output for enhanced result statistics"""
|
||||
lines = output.split('\n')
|
||||
"""Parse Ansible PLAY RECAP output for result statistics."""
|
||||
import re
|
||||
successful_hosts = 0
|
||||
failed_hosts = 0
|
||||
unreachable_hosts = 0
|
||||
skipped_hosts = 0
|
||||
changed_hosts = 0
|
||||
|
||||
for line in lines:
|
||||
if 'ok=' in line and 'changed=' in line:
|
||||
# Parse line like: "host1: ok=4 changed=2 unreachable=0 failed=0"
|
||||
try:
|
||||
if 'failed=0' in line:
|
||||
successful_hosts += 1
|
||||
else:
|
||||
failed_count = int(line.split('failed=')[1].split()[0])
|
||||
if failed_count > 0:
|
||||
failed_hosts += 1
|
||||
else:
|
||||
successful_hosts += 1
|
||||
|
||||
if 'unreachable=' in line:
|
||||
unreachable = int(line.split('unreachable=')[1].split()[0])
|
||||
if unreachable > 0:
|
||||
unreachable_hosts += 1
|
||||
|
||||
if 'skipped=' in line:
|
||||
skipped = int(line.split('skipped=')[1].split()[0])
|
||||
if skipped > 0:
|
||||
skipped_hosts += 1
|
||||
|
||||
if 'changed=' in line:
|
||||
changed = int(line.split('changed=')[1].split()[0])
|
||||
if changed > 0:
|
||||
changed_hosts += 1
|
||||
|
||||
except (ValueError, IndexError):
|
||||
# Skip malformed lines
|
||||
continue
|
||||
|
||||
# Update execution record
|
||||
execution.successful_hosts = successful_hosts
|
||||
execution.failed_hosts = failed_hosts
|
||||
|
||||
# Match PLAY RECAP lines:
|
||||
# "RPI-FOO : ok=4 changed=1 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0"
|
||||
recap_re = re.compile(
|
||||
r'ok=(\d+)\s+changed=(\d+)\s+unreachable=(\d+)\s+failed=(\d+)'
|
||||
)
|
||||
|
||||
for line in output.split('\n'):
|
||||
m = recap_re.search(line)
|
||||
if not m:
|
||||
continue
|
||||
ok = int(m.group(1))
|
||||
changed = int(m.group(2))
|
||||
unreachable = int(m.group(3))
|
||||
failed = int(m.group(4))
|
||||
|
||||
if unreachable > 0:
|
||||
unreachable_hosts += 1
|
||||
elif failed > 0:
|
||||
failed_hosts += 1
|
||||
else:
|
||||
successful_hosts += 1
|
||||
|
||||
if changed > 0:
|
||||
changed_hosts += 1
|
||||
|
||||
execution.successful_hosts = successful_hosts
|
||||
execution.failed_hosts = failed_hosts
|
||||
execution.unreachable_hosts = unreachable_hosts
|
||||
execution.skipped_hosts = skipped_hosts
|
||||
execution.changed_hosts = changed_hosts
|
||||
execution.skipped_hosts = skipped_hosts
|
||||
execution.changed_hosts = changed_hosts
|
||||
|
||||
def _get_playbook_description(self, playbook_name: str) -> str:
|
||||
"""Get user-friendly description for playbook"""
|
||||
descriptions = {
|
||||
'update_devices': 'Update all packages and monitoring software on devices',
|
||||
'restart_service': 'Restart monitoring services on selected devices',
|
||||
'restart_service': 'Restart monitoring services on selected devices',
|
||||
'system_health': 'Check system health and monitoring status',
|
||||
'maintenance_mode': 'Put devices in maintenance mode'
|
||||
'maintenance_mode': 'Put devices in maintenance mode',
|
||||
'distribute_ssh_keys': 'Push server public key to all devices using password auth',
|
||||
}
|
||||
return descriptions.get(playbook_name, f'Execute {playbook_name} playbook')
|
||||
|
||||
def create_distribute_ssh_keys_playbook(self) -> str:
|
||||
"""Ensure the distribute_ssh_keys playbook file exists (ships with the repo)."""
|
||||
playbook_path = self.playbook_dir / 'distribute_ssh_keys.yml'
|
||||
if not playbook_path.exists():
|
||||
logging.warning('distribute_ssh_keys.yml not found — playbook file is missing')
|
||||
return str(playbook_path)
|
||||
|
||||
def create_system_health_playbook(self) -> str:
|
||||
"""Create system health check playbook"""
|
||||
@@ -782,6 +819,53 @@ class AnsibleService:
|
||||
unreachable = int(line.split('unreachable=')[1].split()[0])
|
||||
execution.unreachable_hosts += unreachable
|
||||
|
||||
def test_password_auth(self, device_ip: str, password: str,
|
||||
username: str = 'pi', port: int = 22) -> Dict:
|
||||
"""
|
||||
Test SSH connectivity using password-only authentication (no key fallback).
|
||||
Uses sshpass so we can confirm the exact password works before deploying keys.
|
||||
"""
|
||||
try:
|
||||
# Quick TCP reachability check first
|
||||
import socket
|
||||
with socket.create_connection((device_ip, port), timeout=5):
|
||||
pass
|
||||
except (OSError, ConnectionRefusedError) as e:
|
||||
return {'success': False, 'reachable': False,
|
||||
'error': f'Host unreachable on port {port}: {e}'}
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
'sshpass', '-p', password,
|
||||
'ssh',
|
||||
'-o', 'PubkeyAuthentication=no',
|
||||
'-o', 'PreferredAuthentications=password',
|
||||
'-o', 'StrictHostKeyChecking=no',
|
||||
'-o', 'UserKnownHostsFile=/dev/null',
|
||||
'-o', f'ConnectTimeout=8',
|
||||
'-p', str(port),
|
||||
f'{username}@{device_ip}',
|
||||
'echo OK',
|
||||
],
|
||||
capture_output=True, text=True, timeout=15,
|
||||
)
|
||||
if result.returncode == 0 and 'OK' in result.stdout:
|
||||
return {'success': True, 'reachable': True,
|
||||
'message': f'Password authentication succeeded for {username}@{device_ip}'}
|
||||
else:
|
||||
stderr = (result.stderr or '').strip()
|
||||
return {'success': False, 'reachable': True,
|
||||
'error': f'Authentication failed — {stderr or "wrong password"}'}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {'success': False, 'reachable': True,
|
||||
'error': 'SSH command timed out'}
|
||||
except FileNotFoundError:
|
||||
return {'success': False, 'reachable': True,
|
||||
'error': 'sshpass not installed — run: sudo apt-get install sshpass'}
|
||||
except Exception as e:
|
||||
return {'success': False, 'reachable': True, 'error': str(e)}
|
||||
|
||||
def test_ssh_connectivity(self, device_ip: str, username: str = 'pi') -> Dict:
|
||||
"""Test SSH connectivity to a device"""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user