From 201fc48e7fea4fe6658fdd02d12b7e9d2acb93d5 Mon Sep 17 00:00:00 2001 From: Daniel Sheppard Date: Sat, 28 Sep 2024 00:08:37 -0500 Subject: [PATCH 1/7] Update Python Setup Action --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88c3f59..7c0ebd8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,7 @@ jobs: steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} From 744544457a1e79593048afa08b755fa8a5e422d6 Mon Sep 17 00:00:00 2001 From: Daniel Sheppard Date: Mon, 30 Sep 2024 20:09:28 -0500 Subject: [PATCH 2/7] Initial work on new backup system --- netbox_config_backup/__init__.py | 9 +++ netbox_config_backup/backup/__init__.py | 0 netbox_config_backup/backup/processing.py | 80 ++++++++++++++++++ netbox_config_backup/filtersets.py | 11 +++ netbox_config_backup/forms.py | 10 ++- netbox_config_backup/jobs/__init__.py | 0 netbox_config_backup/jobs/backup.py | 81 +++++++++++++++++++ .../management/commands/fork.py | 5 ++ .../migrations/0016_add_pid_to_backup_job.py | 18 +++++ netbox_config_backup/models/jobs.py | 11 +++ netbox_config_backup/navigation.py | 10 ++- netbox_config_backup/tables.py | 26 ++++++ netbox_config_backup/tasks.py | 77 +----------------- netbox_config_backup/urls.py | 1 + netbox_config_backup/utils/napalm.py | 80 ++++++++++++++++++ netbox_config_backup/views.py | 15 +++- pyproject.toml | 2 +- 17 files changed, 355 insertions(+), 81 deletions(-) create mode 100644 netbox_config_backup/backup/__init__.py create mode 100644 netbox_config_backup/backup/processing.py create mode 100644 netbox_config_backup/jobs/__init__.py create mode 100644 netbox_config_backup/jobs/backup.py create mode 100644 netbox_config_backup/migrations/0016_add_pid_to_backup_job.py create mode 100644 netbox_config_backup/utils/napalm.py diff --git a/netbox_config_backup/__init__.py b/netbox_config_backup/__init__.py index c03654b..dd28553 100644 --- a/netbox_config_backup/__init__.py +++ b/netbox_config_backup/__init__.py @@ -29,5 +29,14 @@ class NetboxConfigBackup(PluginConfig): ] graphql_schema = 'graphql.schema.schema' + def ready(self): + super().ready() + from netbox import settings + from netbox_config_backup.jobs.backup import BackupRunner + from netbox_config_backup.models import BackupJob, Backup + + frequency = settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency') / 60 + BackupRunner.enqueue_once(interval=frequency) + config = NetboxConfigBackup diff --git a/netbox_config_backup/backup/__init__.py b/netbox_config_backup/backup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netbox_config_backup/backup/processing.py b/netbox_config_backup/backup/processing.py new file mode 100644 index 0000000..65ea1f8 --- /dev/null +++ b/netbox_config_backup/backup/processing.py @@ -0,0 +1,80 @@ +import logging +import os + +from django.utils import timezone + +from core.choices import JobStatusChoices +from netbox_config_backup.utils.configs import check_config_save_status +from netbox_config_backup.utils.napalm import napalm_init +from netbox_config_backup.utils.rq import can_backup + +logger = logging.getLogger(f"netbox_config_backup") + + +def run_backup(backup, job): + pid = os.getpid() + + job.status = JobStatusChoices.STATUS_PENDING + job.pid = pid + job.save() + try: + if not can_backup(backup): + job.status = JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': f'Cannot backup {backup}'}) + job.full_clean() + job.save() + logger.warning(f'Cannot backup {backup}') + return + + commit = None + ip = backup.ip if backup.ip is not None else backup.device.primary_ip + + if ip: + d = napalm_init(backup.device, ip) + job.status = JobStatusChoices.STATUS_RUNNING + job.started = timezone.now() + job.save() + try: + status = check_config_save_status(d) + if status is not None: + if status and not backup.config_status: + backup.config_status = status + backup.save() + elif not status and backup.config_status: + backup.config_status = status + backup.save() + elif not status and backup.config_status is None: + backup.config_status = status + backup.save() + elif status and backup.config_status is None: + backup.config_status = status + backup.save() + except Exception as e: + logger.error(f'{backup}: had error setting backup status: {e}') + + configs = d.get_config() + commit = backup.set_config(configs) + + d.close() + logger.info(f'{backup}: Backup complete') + job.status = JobStatusChoices.STATUS_COMPLETED + job.completed = timezone.now() + job.save() + else: + job.status = JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': f'{backup}: No IP set'}) + job.full_clean() + job.save() + logger.info(f'{backup}: No IP set') + except Exception as e: + job.status = JobStatusChoices.STATUS_ERRORED + if not job.data: + job.data = {} + job.data.update({'error': f'{e}'}) + job.full_clean() + job.save() + logger.error(f'Exception in {backup}: {e}') diff --git a/netbox_config_backup/filtersets.py b/netbox_config_backup/filtersets.py index a3d273a..b494042 100644 --- a/netbox_config_backup/filtersets.py +++ b/netbox_config_backup/filtersets.py @@ -13,6 +13,17 @@ from utilities.filters import MultiValueCharFilter +class BackupJobFilterSet(BaseFilterSet): + q = django_filters.CharFilter( + method='search', + label=_('Search'), + ) + + class Meta: + model = models.BackupJob + fields = ['id', ] + + class BackupFilterSet(BaseFilterSet): q = django_filters.CharFilter( method='search', diff --git a/netbox_config_backup/forms.py b/netbox_config_backup/forms.py index e8d04cd..a01cfd3 100644 --- a/netbox_config_backup/forms.py +++ b/netbox_config_backup/forms.py @@ -7,11 +7,12 @@ from dcim.models import Device from ipam.models import IPAddress from netbox.forms import NetBoxModelForm, NetBoxModelBulkEditForm -from netbox_config_backup.models import Backup +from netbox_config_backup.models import Backup, BackupJob from utilities.forms.fields import DynamicModelChoiceField, DynamicModelMultipleChoiceField, CommentField __all__ = ( 'BackupForm', + 'BackupJobFilterSetForm', 'BackupFilterSetForm', 'BackupBulkEditForm', ) @@ -59,6 +60,13 @@ def clean(self): raise ValidationError({'device': f'{device}\'s platform ({device.platform}) has no napalm driver'}) +class BackupJobFilterSetForm(forms.Form): + model = BackupJob + field_order = [ + 'q', + ] + + class BackupFilterSetForm(forms.Form): model = Backup field_order = [ diff --git a/netbox_config_backup/jobs/__init__.py b/netbox_config_backup/jobs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netbox_config_backup/jobs/backup.py b/netbox_config_backup/jobs/backup.py new file mode 100644 index 0000000..788d456 --- /dev/null +++ b/netbox_config_backup/jobs/backup.py @@ -0,0 +1,81 @@ +import logging +import time +import uuid +from datetime import timedelta +from multiprocessing import Process + +from django.db.models import Q +from django.utils import timezone + +from core.choices import JobStatusChoices +from netbox.jobs import JobRunner +from netbox_config_backup.backup.processing import run_backup +from netbox_config_backup.choices import StatusChoices +from netbox_config_backup.models import Backup, BackupJob + +logger = logging.getLogger(f"netbox_config_backup") + + +class BackupRunner(JobRunner): + class Meta: + name = 'The Backup Job Runner' + + def run(self, *args, **kwargs): + processes = {} + for backup in Backup.objects.filter(status=StatusChoices.STATUS_ACTIVE, device__isnull=False): + running = BackupJob.objects.filter( + ~Q( + status__in=[ + JobStatusChoices.STATUS_COMPLETED, + JobStatusChoices.STATUS_ERRORED, + JobStatusChoices.STATUS_FAILED + ] + ) + ) + while running.count() >= 5: + logger.debug(f'Number of running jobs >= 5, sleeping for 60 seconds') + old = running.filter(scheduled__lt=timezone.now() - timedelta(minutes=30)) + for job in old.all(): + if job.pid and processes.get(job.pid): + processes.get(job.pid).terminate() + job.status = JobStatusChoices.STATUS_ERRORED + if not job.data: + job.data = {} + job.data.update({'error': 'Job hung'}) + job.delete() + logger.error(f'Job {job.backup} appears stuck, deleting') + time.sleep(60) + + logger.info(f'Queuing device {backup.device} for backup') + job = BackupJob( + backup=backup, + status=JobStatusChoices.STATUS_SCHEDULED, + scheduled=timezone.now(), + job_id=uuid.uuid4(), + data={}, + ) + job.full_clean() + job.save() + if backup.device and (backup.ip or backup.device.primary_ip): + process = Process(target=run_backup, args=(backup, job), ) + logger.info(f'Forking process {process.pid} for {backup.device} backup') + processes.update({backup.pk: process}) + process.start() + time.sleep(10) + else: + job.status=JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': f'Cannot backup {backup} due to no device or IPs'}) + job.save() + logger.warning(f'Cannot backup {backup} due to no device or IPs') + + while(True): + for pk in list(processes.keys()): + process = processes.get(pk) + if not process.is_alive(): + process.terminate() + del processes[pk] + if len(processes) == 0: + return + time.sleep(1) diff --git a/netbox_config_backup/management/commands/fork.py b/netbox_config_backup/management/commands/fork.py index cd3881e..3eb8998 100644 --- a/netbox_config_backup/management/commands/fork.py +++ b/netbox_config_backup/management/commands/fork.py @@ -22,6 +22,8 @@ def test(i): self.stdout.write(f"Child {i} is running") self.stdout.write(f"Child {i} sleeping 10 seconds") time.sleep(10) + if i == 1: + raise Exception(f"Child {i} exception") self.stdout.write(f"Child {i} sleep complete") processes = {} @@ -38,9 +40,12 @@ def test(i): for pid in list(processes.keys()): process = processes.get(pid, None) if not process.is_alive(): + print(f'{process} not alive') del processes[pid] time.sleep(1) + self.stdout.write('Finished') + diff --git a/netbox_config_backup/migrations/0016_add_pid_to_backup_job.py b/netbox_config_backup/migrations/0016_add_pid_to_backup_job.py new file mode 100644 index 0000000..5b5c9e9 --- /dev/null +++ b/netbox_config_backup/migrations/0016_add_pid_to_backup_job.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.8 on 2024-09-30 21:53 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('netbox_config_backup', '0015_backup_comments_backup_description'), + ] + + operations = [ + migrations.AddField( + model_name='backupjob', + name='pid', + field=models.BigIntegerField(blank=True, null=True), + ), + ] diff --git a/netbox_config_backup/models/jobs.py b/netbox_config_backup/models/jobs.py index b80ac00..8e2208d 100644 --- a/netbox_config_backup/models/jobs.py +++ b/netbox_config_backup/models/jobs.py @@ -3,8 +3,12 @@ from django.db import models from django.db.models import ForeignKey from django.utils import timezone +from django.utils.translation import gettext as _ + from django_rq import get_queue + from core.choices import JobStatusChoices +from utilities.querysets import RestrictedQuerySet from .abstract import BigIDModel logger = logging.getLogger(f"netbox_config_backup") @@ -18,6 +22,11 @@ class BackupJob(BigIDModel): null=False, related_name='jobs', ) + pid = models.BigIntegerField( + verbose_name=_('PID'), + null=True, + blank=True, + ) created = models.DateTimeField( auto_now_add=True ) @@ -46,6 +55,8 @@ class BackupJob(BigIDModel): unique=True ) + objects = RestrictedQuerySet.as_manager() + def __str__(self): return str(self.job_id) diff --git a/netbox_config_backup/navigation.py b/netbox_config_backup/navigation.py index 43ef357..bb3cf88 100644 --- a/netbox_config_backup/navigation.py +++ b/netbox_config_backup/navigation.py @@ -1,6 +1,13 @@ from netbox.choices import ButtonColorChoices from netbox.plugins import PluginMenuItem, PluginMenuButton, PluginMenu +jobs = PluginMenuItem( + link='plugins:netbox_config_backup:backupjob_list', + link_text='Jobs', + permissions=['netbox_config_backup.view_backups'], + buttons=[] +) + assigned = PluginMenuItem( link='plugins:netbox_config_backup:backup_list', link_text='Devices', @@ -25,6 +32,7 @@ menu = PluginMenu( label="Configuration Backup", groups=( - ('Backup Jobs', (assigned, unassigned)), + ('Backup Jobs', (jobs, )), + ('Backups', (assigned, unassigned)), ) ) diff --git a/netbox_config_backup/tables.py b/netbox_config_backup/tables.py index bc2fb8c..52eb3d6 100644 --- a/netbox_config_backup/tables.py +++ b/netbox_config_backup/tables.py @@ -27,6 +27,32 @@ def header(self): return '' +class BackupJobTable(BaseTable): + pk = columns.ToggleColumn( + + ) + backup = tables.Column( + linkify=True, + verbose_name='Backup' + ) + created = tables.DateTimeColumn() + scheduled = tables.DateTimeColumn() + started = tables.DateTimeColumn() + completed = tables.DateTimeColumn() + + class Meta(BaseTable.Meta): + model = Backup + fields = ( + 'pk', 'backup', 'pid', 'created', 'scheduled', 'started', 'completed', 'status' + ) + default_columns = ( + 'pk', 'backup', 'pid', 'created', 'scheduled', 'started', 'completed', 'status' + ) + + def render_backup_count(self, value): + return f'{value.count()}' + + class BackupTable(BaseTable): pk = columns.ToggleColumn( diff --git a/netbox_config_backup/tasks.py b/netbox_config_backup/tasks.py index 0643139..64b83bc 100644 --- a/netbox_config_backup/tasks.py +++ b/netbox_config_backup/tasks.py @@ -2,91 +2,18 @@ from datetime import timedelta from django.utils import timezone -from netmiko import NetmikoAuthenticationException, NetmikoTimeoutException from core.choices import JobStatusChoices from netbox import settings from netbox.api.exceptions import ServiceUnavailable +from netbox_config_backup.backup.processing import logger from netbox_config_backup.models import BackupJob from netbox_config_backup.utils.configs import check_config_save_status from netbox_config_backup.utils.logger import get_logger +from netbox_config_backup.utils.napalm import napalm_init from netbox_config_backup.utils.rq import can_backup -def napalm_init(device, ip=None, extra_args={}): - from netbox import settings - - username = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( - 'NAPALM_USERNAME', None - ) - password = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( - 'NAPALM_PASSWORD', None - ) - timeout = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( - 'NAPALM_TIMEOUT', None - ) - optional_args = ( - settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}) - .get('NAPALM_ARGS', []) - .copy() - ) - - if device and device.platform and device.platform.napalm.napalm_args is not None: - optional_args.update(device.platform.napalm.napalm_args) - if extra_args != {}: - optional_args.update(extra_args) - - # Check for primary IP address from NetBox object - if ip is not None: - host = str(ip.address.ip) - elif device.primary_ip and device.primary_ip is not None: - host = str(device.primary_ip.address.ip) - else: - raise ServiceUnavailable("This device does not have a primary IP address") - - # Check that NAPALM is installed - try: - import napalm - from napalm.base.exceptions import ModuleImportError - except ModuleNotFoundError as e: - if getattr(e, 'name') == 'napalm': - raise ServiceUnavailable( - "NAPALM is not installed. Please see the documentation for instructions." - ) - raise e - - # Validate the configured driver - try: - driver = napalm.get_network_driver(device.platform.napalm.napalm_driver) - except ModuleImportError: - raise ServiceUnavailable( - "NAPALM driver for platform {} not found: {}.".format( - device.platform, device.platform.napalm.napalm_driver - ) - ) - - # Connect to the device - d = driver( - hostname=host, - username=username, - password=password, - timeout=timeout, - optional_args=optional_args, - ) - try: - d.open() - except Exception as e: - if isinstance(e, NetmikoAuthenticationException): - logger.info('Authentication error') - elif isinstance(e, NetmikoTimeoutException): - logger.info('Connection error') - raise ServiceUnavailable( - "Error connecting to the device at {}: {}".format(host, e) - ) - - return d - - def backup_config(backup, pk=None): commit = None if backup.device: diff --git a/netbox_config_backup/urls.py b/netbox_config_backup/urls.py index 9394638..a1e69da 100644 --- a/netbox_config_backup/urls.py +++ b/netbox_config_backup/urls.py @@ -5,6 +5,7 @@ from .models import Backup urlpatterns = [ + path('jobs/', views.BackupJobListView.as_view(), name='backupjob_list'), path('unassigned/', views.UnassignedBackupListView.as_view(), name='unassignedbackup_list'), path('devices/', views.BackupListView.as_view(), name='backup_list'), path('devices/add/', views.BackupEditView.as_view(), name='backup_add'), diff --git a/netbox_config_backup/utils/napalm.py b/netbox_config_backup/utils/napalm.py new file mode 100644 index 0000000..bdca7b8 --- /dev/null +++ b/netbox_config_backup/utils/napalm.py @@ -0,0 +1,80 @@ +import logging +from netmiko import NetmikoAuthenticationException, NetmikoTimeoutException + +from netbox.api.exceptions import ServiceUnavailable + +logger = logging.getLogger(f"netbox_config_backup") + + +def napalm_init(device, ip=None, extra_args={}): + from netbox import settings + + username = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( + 'NAPALM_USERNAME', None + ) + password = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( + 'NAPALM_PASSWORD', None + ) + timeout = settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}).get( + 'NAPALM_TIMEOUT', None + ) + optional_args = ( + settings.PLUGINS_CONFIG.get('netbox_napalm_plugin', {}) + .get('NAPALM_ARGS', []) + .copy() + ) + + if device and device.platform and device.platform.napalm.napalm_args is not None: + optional_args.update(device.platform.napalm.napalm_args) + if extra_args != {}: + optional_args.update(extra_args) + + # Check for primary IP address from NetBox object + if ip is not None: + host = str(ip.address.ip) + elif device.primary_ip and device.primary_ip is not None: + host = str(device.primary_ip.address.ip) + else: + raise ServiceUnavailable("This device does not have a primary IP address") + + # Check that NAPALM is installed + try: + import napalm + from napalm.base.exceptions import ModuleImportError + except ModuleNotFoundError as e: + if getattr(e, 'name') == 'napalm': + raise ServiceUnavailable( + "NAPALM is not installed. Please see the documentation for instructions." + ) + raise e + + # Validate the configured driver + try: + driver = napalm.get_network_driver(device.platform.napalm.napalm_driver) + except ModuleImportError: + raise ServiceUnavailable( + "NAPALM driver for platform {} not found: {}.".format( + device.platform, device.platform.napalm.napalm_driver + ) + ) + + # Connect to the device + d = driver( + hostname=host, + username=username, + password=password, + timeout=timeout, + optional_args=optional_args, + ) + try: + d.open() + except Exception as e: + if isinstance(e, NetmikoAuthenticationException): + logger.info('Authentication error') + elif isinstance(e, NetmikoTimeoutException): + logger.info('Connection error') + raise ServiceUnavailable( + "Error connecting to the device at {}: {}".format(host, e) + ) + + return d diff --git a/netbox_config_backup/views.py b/netbox_config_backup/views.py index 6109d51..cfc6328 100644 --- a/netbox_config_backup/views.py +++ b/netbox_config_backup/views.py @@ -9,18 +9,27 @@ from core.choices import JobStatusChoices from netbox.views.generic import ObjectDeleteView, ObjectEditView, ObjectView, ObjectListView, ObjectChildrenView, \ BulkEditView, BulkDeleteView -from netbox_config_backup.filtersets import BackupFilterSet, BackupsFilterSet +from netbox_config_backup.filtersets import BackupFilterSet, BackupsFilterSet, BackupJobFilterSet -from netbox_config_backup.forms import BackupForm, BackupFilterSetForm, BackupBulkEditForm +from netbox_config_backup.forms import BackupForm, BackupFilterSetForm, BackupBulkEditForm, BackupJobFilterSetForm from netbox_config_backup.git import GitBackup from netbox_config_backup.models import Backup, BackupJob, BackupCommitTreeChange, BackupCommit, BackupObject -from netbox_config_backup.tables import BackupTable, BackupsTable +from netbox_config_backup.tables import BackupTable, BackupsTable, BackupJobTable from netbox_config_backup.utils import get_backup_tables, Differ from utilities.views import register_model_view, ViewTab logger = logging.getLogger(f"netbox_config_backup") +class BackupJobListView(ObjectListView): + queryset = BackupJob.objects.all() + + filterset = BackupJobFilterSet + filterset_form = BackupJobFilterSetForm + table = BackupJobTable + action_buttons = () + + class BackupListView(ObjectListView): queryset = Backup.objects.filter(device__isnull=False).default_annotate() diff --git a/pyproject.toml b/pyproject.toml index b0e759a..2055e1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ description = "A NetBox Switch Configuration Backup Plugin" readme = "README.md" requires-python = ">=3.10" keywords = ["netbox-plugin", ] -version = "2.1.1" +version = "2.1.2-alpha1" license = {file = "LICENSE"} classifiers = [ "Programming Language :: Python :: 3", From b8a73d506a3c83a6f5f692a872a70c1e35c4cae7 Mon Sep 17 00:00:00 2001 From: Daniel Sheppard Date: Mon, 30 Sep 2024 20:55:44 -0500 Subject: [PATCH 3/7] More tweaking of job handler --- netbox_config_backup/__init__.py | 15 +++--- netbox_config_backup/jobs/backup.py | 47 ++++++++++--------- .../migrations/0017_add_job_to_backupjob.py | 26 ++++++++++ netbox_config_backup/models/jobs.py | 8 ++++ 4 files changed, 67 insertions(+), 29 deletions(-) create mode 100644 netbox_config_backup/migrations/0017_add_job_to_backupjob.py diff --git a/netbox_config_backup/__init__.py b/netbox_config_backup/__init__.py index dd28553..872ce32 100644 --- a/netbox_config_backup/__init__.py +++ b/netbox_config_backup/__init__.py @@ -29,14 +29,15 @@ class NetboxConfigBackup(PluginConfig): ] graphql_schema = 'graphql.schema.schema' - def ready(self): + def ready(self, *args, **kwargs): super().ready() - from netbox import settings - from netbox_config_backup.jobs.backup import BackupRunner - from netbox_config_backup.models import BackupJob, Backup - - frequency = settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency') / 60 - BackupRunner.enqueue_once(interval=frequency) + import sys + if 'manage.py' not in sys.argv[0]: + from netbox import settings + from netbox_config_backup.jobs.backup import BackupRunner + from netbox_config_backup.models import BackupJob, Backup + frequency = settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency') / 60 + BackupRunner.enqueue_once(interval=frequency) config = NetboxConfigBackup diff --git a/netbox_config_backup/jobs/backup.py b/netbox_config_backup/jobs/backup.py index 788d456..219d90a 100644 --- a/netbox_config_backup/jobs/backup.py +++ b/netbox_config_backup/jobs/backup.py @@ -20,34 +20,37 @@ class BackupRunner(JobRunner): class Meta: name = 'The Backup Job Runner' + def clean_stale_jobs(self, processes, old): + for job in old.all(): + if job.pid: + pass + job.status = JobStatusChoices.STATUS_ERRORED + if not job.data: + job.data = {} + job.data.update({'error': 'Job hung'}) + job.delete() + logger.error(f'Job {job.backup} appears stuck, deleting') + def run(self, *args, **kwargs): + running = BackupJob.objects.filter( + ~Q( + status__in=[ + JobStatusChoices.STATUS_COMPLETED, + JobStatusChoices.STATUS_ERRORED, + JobStatusChoices.STATUS_FAILED + ] + ) + ) processes = {} + for backup in Backup.objects.filter(status=StatusChoices.STATUS_ACTIVE, device__isnull=False): - running = BackupJob.objects.filter( - ~Q( - status__in=[ - JobStatusChoices.STATUS_COMPLETED, - JobStatusChoices.STATUS_ERRORED, - JobStatusChoices.STATUS_FAILED - ] - ) - ) - while running.count() >= 5: + while running.count() >= 20: logger.debug(f'Number of running jobs >= 5, sleeping for 60 seconds') old = running.filter(scheduled__lt=timezone.now() - timedelta(minutes=30)) - for job in old.all(): - if job.pid and processes.get(job.pid): - processes.get(job.pid).terminate() - job.status = JobStatusChoices.STATUS_ERRORED - if not job.data: - job.data = {} - job.data.update({'error': 'Job hung'}) - job.delete() - logger.error(f'Job {job.backup} appears stuck, deleting') - time.sleep(60) - + self.clean_stale_jobs(processes, old) logger.info(f'Queuing device {backup.device} for backup') job = BackupJob( + runner=self.job, backup=backup, status=JobStatusChoices.STATUS_SCHEDULED, scheduled=timezone.now(), @@ -61,7 +64,7 @@ def run(self, *args, **kwargs): logger.info(f'Forking process {process.pid} for {backup.device} backup') processes.update({backup.pk: process}) process.start() - time.sleep(10) + time.sleep(5) else: job.status=JobStatusChoices.STATUS_FAILED if not job.data: diff --git a/netbox_config_backup/migrations/0017_add_job_to_backupjob.py b/netbox_config_backup/migrations/0017_add_job_to_backupjob.py new file mode 100644 index 0000000..f32e81e --- /dev/null +++ b/netbox_config_backup/migrations/0017_add_job_to_backupjob.py @@ -0,0 +1,26 @@ +# Generated by Django 5.0.8 on 2024-10-01 01:52 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0012_job_object_type_optional'), + ('netbox_config_backup', '0016_add_pid_to_backup_job'), + ] + + operations = [ + migrations.AddField( + model_name='backupjob', + name='runner', + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='backup_job', + to='core.job', + ), + ), + ] diff --git a/netbox_config_backup/models/jobs.py b/netbox_config_backup/models/jobs.py index 8e2208d..7a28ba9 100644 --- a/netbox_config_backup/models/jobs.py +++ b/netbox_config_backup/models/jobs.py @@ -15,6 +15,14 @@ class BackupJob(BigIDModel): + runner = models.ForeignKey( + verbose_name=_('Job Run'), + to='core.Job', + on_delete=models.SET_NULL, + related_name='backup_job', + null=True, + blank=True + ) backup = ForeignKey( to='Backup', on_delete=models.CASCADE, From a81e77c3626075e832b0150364fe915c8a60732f Mon Sep 17 00:00:00 2001 From: Daniel Sheppard Date: Mon, 30 Sep 2024 22:50:43 -0500 Subject: [PATCH 4/7] New Backup method - Alpha --- netbox_config_backup/backup/processing.py | 21 ++- netbox_config_backup/jobs/backup.py | 127 ++++++++++------- netbox_config_backup/models/backups.py | 23 ---- netbox_config_backup/models/jobs.py | 58 +------- netbox_config_backup/tasks.py | 122 ----------------- netbox_config_backup/template_content.py | 4 - netbox_config_backup/utils/__init__.py | 8 -- netbox_config_backup/utils/rq.py | 160 ---------------------- netbox_config_backup/views.py | 4 - 9 files changed, 97 insertions(+), 430 deletions(-) diff --git a/netbox_config_backup/backup/processing.py b/netbox_config_backup/backup/processing.py index 65ea1f8..03ed751 100644 --- a/netbox_config_backup/backup/processing.py +++ b/netbox_config_backup/backup/processing.py @@ -1,9 +1,13 @@ import logging import os +import traceback +from django.db.models import Q from django.utils import timezone from core.choices import JobStatusChoices +from netbox.api.exceptions import ServiceUnavailable +from netbox_config_backup.models import BackupJob from netbox_config_backup.utils.configs import check_config_save_status from netbox_config_backup.utils.napalm import napalm_init from netbox_config_backup.utils.rq import can_backup @@ -11,6 +15,10 @@ logger = logging.getLogger(f"netbox_config_backup") +def remove_stale_backupjobs(job: BackupJob): + BackupJob.objects.filter(backup=job.backup).exclude(status=JobStatusChoices.STATUS_COMPLETED).exclude( + pk=job.pk).delete() + def run_backup(backup, job): pid = os.getpid() @@ -32,7 +40,14 @@ def run_backup(backup, job): ip = backup.ip if backup.ip is not None else backup.device.primary_ip if ip: - d = napalm_init(backup.device, ip) + try: + d = napalm_init(backup.device, ip) + except (TimeoutError, ServiceUnavailable): + job.status = JobStatusChoices.STATUS_FAILED + job.data = {'error': f'Timeout Connecting to {backup.device} with ip {ip}'} + job.save() + return + job.status = JobStatusChoices.STATUS_RUNNING job.started = timezone.now() job.save() @@ -62,6 +77,7 @@ def run_backup(backup, job): job.status = JobStatusChoices.STATUS_COMPLETED job.completed = timezone.now() job.save() + remove_stale_backupjobs(job=job) else: job.status = JobStatusChoices.STATUS_FAILED if not job.data: @@ -69,7 +85,7 @@ def run_backup(backup, job): job.data.update({'error': f'{backup}: No IP set'}) job.full_clean() job.save() - logger.info(f'{backup}: No IP set') + logger.debug(f'{backup}: No IP set') except Exception as e: job.status = JobStatusChoices.STATUS_ERRORED if not job.data: @@ -78,3 +94,4 @@ def run_backup(backup, job): job.full_clean() job.save() logger.error(f'Exception in {backup}: {e}') + logger.info(f'{backup}: {traceback.format_exc()}') diff --git a/netbox_config_backup/jobs/backup.py b/netbox_config_backup/jobs/backup.py index 219d90a..a79178e 100644 --- a/netbox_config_backup/jobs/backup.py +++ b/netbox_config_backup/jobs/backup.py @@ -17,10 +17,12 @@ class BackupRunner(JobRunner): + processes = {} + class Meta: name = 'The Backup Job Runner' - def clean_stale_jobs(self, processes, old): + def clean_stale_jobs(self, old): for job in old.all(): if job.pid: pass @@ -28,57 +30,82 @@ def clean_stale_jobs(self, processes, old): if not job.data: job.data = {} job.data.update({'error': 'Job hung'}) - job.delete() - logger.error(f'Job {job.backup} appears stuck, deleting') + job.save() + logger.warning(f'Job {job.backup} appears stuck, deleting') - def run(self, *args, **kwargs): - running = BackupJob.objects.filter( - ~Q( - status__in=[ - JobStatusChoices.STATUS_COMPLETED, - JobStatusChoices.STATUS_ERRORED, - JobStatusChoices.STATUS_FAILED - ] - ) - ) - processes = {} + def handle_processes(self): + for pk in list(self.processes.keys()): + process = self.processes.get(pk, {}).get('process') + job_pk = self.processes.get(pk, {}).get('job') + backup = self.processes.get(pk, {}).get('backup') + if not process.is_alive(): + logger.debug(f'Terminating process {process.pid} with job pk of {pk} for {backup}') + process.terminate() + del self.processes[pk] + job = BackupJob.objects.filter(pk=job_pk).first() + if job and job.status != JobStatusChoices.STATUS_COMPLETED: + job.status = JobStatusChoices.STATUS_ERRORED + if not job.data: + job.data = {} + job.data.update({'error': 'Process terminated'}) + job.save() - for backup in Backup.objects.filter(status=StatusChoices.STATUS_ACTIVE, device__isnull=False): - while running.count() >= 20: - logger.debug(f'Number of running jobs >= 5, sleeping for 60 seconds') - old = running.filter(scheduled__lt=timezone.now() - timedelta(minutes=30)) - self.clean_stale_jobs(processes, old) - logger.info(f'Queuing device {backup.device} for backup') - job = BackupJob( - runner=self.job, - backup=backup, - status=JobStatusChoices.STATUS_SCHEDULED, - scheduled=timezone.now(), - job_id=uuid.uuid4(), - data={}, + def fork_process(self, backup, job): + process = Process(target=run_backup, args=(backup, job), ) + data = { + backup.pk: { + 'process': process, + 'backup': backup.pk, + 'job': job.pk + } + } + self.processes.update(data) + process.start() + logger.debug(f'Forking process {process.pid} for {backup.device} backup') + return process + + def run(self, *args, **kwargs): + try: + running = BackupJob.objects.filter( + ~Q( + status__in=[ + JobStatusChoices.STATUS_COMPLETED, + JobStatusChoices.STATUS_ERRORED, + JobStatusChoices.STATUS_FAILED + ] + ) ) - job.full_clean() - job.save() - if backup.device and (backup.ip or backup.device.primary_ip): - process = Process(target=run_backup, args=(backup, job), ) - logger.info(f'Forking process {process.pid} for {backup.device} backup') - processes.update({backup.pk: process}) - process.start() - time.sleep(5) - else: - job.status=JobStatusChoices.STATUS_FAILED - if not job.data: - job.data = {} - job.data.update({'error': f'Cannot backup {backup} due to no device or IPs'}) + old = running.filter(scheduled__lt=timezone.now() - timedelta(minutes=30)) + self.clean_stale_jobs(old) + for backup in Backup.objects.filter(status=StatusChoices.STATUS_ACTIVE, device__isnull=False): + logger.debug(f'Queuing device {backup.device} for backup') + job = BackupJob( + runner=self.job, + backup=backup, + status=JobStatusChoices.STATUS_SCHEDULED, + scheduled=timezone.now(), + job_id=uuid.uuid4(), + data={}, + ) + job.full_clean() job.save() - logger.warning(f'Cannot backup {backup} due to no device or IPs') + if backup.device and (backup.ip or backup.device.primary_ip): + process = self.fork_process(backup, job) + process.join(1) + else: + job.status = JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': f'Cannot backup {backup} due to no device or IPs'}) + job.save() + logger.warning(f'Cannot backup {backup} due to no device or IPs') - while(True): - for pk in list(processes.keys()): - process = processes.get(pk) - if not process.is_alive(): - process.terminate() - del processes[pk] - if len(processes) == 0: - return - time.sleep(1) + while(True): + self.handle_processes() + if len(self.processes) == 0: + return + time.sleep(1) + except Exception as e: + import traceback + logger.error(traceback.format_exc()) + raise e diff --git a/netbox_config_backup/models/backups.py b/netbox_config_backup/models/backups.py index 313c760..d84662f 100644 --- a/netbox_config_backup/models/backups.py +++ b/netbox_config_backup/models/backups.py @@ -9,13 +9,11 @@ from django_rq import get_queue from dcim.models import Device -from core.choices import JobStatusChoices from netbox.models import PrimaryModel from netbox_config_backup.choices import StatusChoices from netbox_config_backup.helpers import get_repository_dir -from netbox_config_backup.utils.rq import remove_queued from ..querysets import BackupQuerySet from ..utils import Differ @@ -58,27 +56,6 @@ def get_absolute_url(self): def __str__(self): return self.name - def delete(self, *args, **kwargs): - queue = get_queue('netbox_config_backup.jobs') - remove_queued(self) - - super().delete(*args, **kwargs) - - def enqueue_if_needed(self): - from netbox_config_backup.utils.rq import enqueue_if_needed - return enqueue_if_needed(self) - - def requeue(self): - self.jobs.filter( - ~Q(status=JobStatusChoices.STATUS_COMPLETED) & - ~Q(status=JobStatusChoices.STATUS_FAILED) & - ~Q(status=JobStatusChoices.STATUS_ERRORED) - ).update( - status=JobStatusChoices.STATUS_FAILED - ) - remove_queued(self) - self.enqueue_if_needed() - def get_config(self, index='HEAD'): from netbox_config_backup.git import repository running = repository.read(f'{self.uuid}.running') diff --git a/netbox_config_backup/models/jobs.py b/netbox_config_backup/models/jobs.py index 7a28ba9..e8a9a29 100644 --- a/netbox_config_backup/models/jobs.py +++ b/netbox_config_backup/models/jobs.py @@ -68,16 +68,6 @@ class BackupJob(BigIDModel): def __str__(self): return str(self.job_id) - def delete(self, using=None, keep_parents=False): - queue = get_queue('netbox_config_backup.jobs') - - job = queue.fetch_job(f'{self.job_id}') - if job is not None: - job.cancel() - job.remove() - - super().delete(using=using, keep_parents=keep_parents) - @property def queue(self): return get_queue('netbox_config_backup.jobs') @@ -99,50 +89,4 @@ def set_status(self, status): """ self.status = status if status in JobStatusChoices.TERMINAL_STATE_CHOICES: - self.completed = timezone.now() - - def reschedule(self, time): - """ - Reschedule a job - """ - if self.status == JobStatusChoices.STATUS_PENDING: - self.scheduled = time - job = self.queue.fetch_job(f'{self.job_id}') - self.queue.schedule(job, time) - else: - raise Exception('Job is not in a state for rescheduling') - - @classmethod - def enqueue(cls, backup, delay=None): - from netbox_config_backup.utils import enqueue - return enqueue(backup, delay) - - @classmethod - def enqueue_if_needed(cls, backup, delay=None, job_id=None): - from netbox_config_backup.utils import enqueue_if_needed - return enqueue_if_needed(backup, delay, job_id) - - @classmethod - def needs_enqueue(cls, backup, job_id=None): - from netbox_config_backup.utils import needs_enqueue - return needs_enqueue(backup, job_id) - - @classmethod - def is_running(cls, backup, job_id=None): - from netbox_config_backup.utils import is_running - return is_running(backup, job_id) - - @classmethod - def is_queued(cls, backup, job_id=None): - from netbox_config_backup.utils import is_queued - return is_queued(backup, job_id) - - @classmethod - def remove_orphaned(cls): - from netbox_config_backup.utils import remove_orphaned - return remove_orphaned() - - @classmethod - def remove_queued(cls, backup): - from netbox_config_backup.utils import remove_queued - return remove_queued() \ No newline at end of file + self.completed = timezone.now() \ No newline at end of file diff --git a/netbox_config_backup/tasks.py b/netbox_config_backup/tasks.py index 64b83bc..e69de29 100644 --- a/netbox_config_backup/tasks.py +++ b/netbox_config_backup/tasks.py @@ -1,122 +0,0 @@ -import traceback -from datetime import timedelta - -from django.utils import timezone - -from core.choices import JobStatusChoices -from netbox import settings -from netbox.api.exceptions import ServiceUnavailable -from netbox_config_backup.backup.processing import logger -from netbox_config_backup.models import BackupJob -from netbox_config_backup.utils.configs import check_config_save_status -from netbox_config_backup.utils.logger import get_logger -from netbox_config_backup.utils.napalm import napalm_init -from netbox_config_backup.utils.rq import can_backup - - -def backup_config(backup, pk=None): - commit = None - if backup.device: - ip = backup.ip if backup.ip is not None else backup.device.primary_ip - else: - ip = None - if not can_backup(backup): - raise Exception(f'Cannot backup {backup}') - - if backup.device is not None and ip is not None: - logger.info(f'{backup}: Backup started') - # logger.debug(f'[{pk}] Connecting') - d = napalm_init(backup.device, ip) - # logger.debug(f'[{pk} - - try: - status = check_config_save_status(d) - if status is not None: - if status and not backup.config_status: - backup.config_status = status - backup.save() - elif not status and backup.config_status: - backup.config_status = status - backup.save() - elif not status and backup.config_status is None: - backup.config_status = status - backup.save() - elif status and backup.config_status is None: - backup.config_status = status - backup.save() - except Exception as e: - logger.error(f'{backup}: had error setting backup status: {e}') - - # logger.debug(f'[{pk}] Getting config') - configs = d.get_config() - # logger.debug(f'[{pk}] Finished config get') - - # logger.debug(f'[{pk}] Setting config') - commit = backup.set_config(configs, pk=pk) - # logger.debug(f'[{pk}] Finished config set') - - d.close() - logger.info(f'{backup}: Backup complete') - else: - logger.info(f'{backup}: No IP set') - - return commit - - -def backup_job(pk): - import netmiko - - try: - job_result = BackupJob.objects.get(pk=pk) - except BackupJob.DoesNotExist: - logger.error(f'Cannot locate job (Id: {pk}) in DB') - raise Exception(f'Cannot locate job (Id: {pk}) in DB') - backup = job_result.backup - - if not can_backup(backup): - logger.warning(f'Cannot backup due to additional factors') - return 1 - delay = timedelta( - seconds=settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency') - ) - - job_result.started = timezone.now() - job_result.status = JobStatusChoices.STATUS_RUNNING - job_result.save() - try: - # logger.debug(f'[{pk}] Starting backup') - commit = backup_config(backup, pk=pk) - # logger.debug(f'[{pk}] Finished backup') - - job_result.set_status(JobStatusChoices.STATUS_COMPLETED) - job_result.data = {'commit': f'{commit}' if commit is not None else ''} - job_result.set_status(JobStatusChoices.STATUS_COMPLETED) - # Enqueue next job if one doesn't exist - try: - # logger.debug(f'[{pk}] Starting Enqueue') - BackupJob.objects.filter(backup=backup).exclude( - status__in=JobStatusChoices.TERMINAL_STATE_CHOICES - ).update(status=JobStatusChoices.STATUS_FAILED) - BackupJob.enqueue_if_needed(backup, delay=delay, job_id=job_result.job_id) - # logger.debug(f'[{pk}] Finished Enqueue') - except Exception as e: - logger.error(f'Job Enqueue after completion failed for job: {backup}') - logger.error(f'\tException: {e}') - except netmiko.exceptions.ReadTimeout as e: - BackupJob.enqueue_if_needed(backup, delay=delay, job_id=job_result.job_id) - logger.warning(f'Netmiko read timeout on job: {backup}') - except ServiceUnavailable as e: - logger.info(f'Napalm service read failure on job: {backup} ({e})') - BackupJob.enqueue_if_needed(backup, delay=delay, job_id=job_result.job_id) - except Exception as e: - logger.error(f'Uncaught Exception on job: {backup}') - logger.error(e) - logger.warning(traceback.format_exc()) - job_result.set_status(JobStatusChoices.STATUS_ERRORED) - BackupJob.enqueue_if_needed(backup, delay=delay, job_id=job_result.job_id) - - # logger.debug(f'[{pk}] Saving result') - job_result.save() - - -logger = get_logger() diff --git a/netbox_config_backup/template_content.py b/netbox_config_backup/template_content.py index e504487..43107e3 100644 --- a/netbox_config_backup/template_content.py +++ b/netbox_config_backup/template_content.py @@ -31,10 +31,6 @@ def build_table(instance): instance = devices.first() table = build_table(instance) - if BackupJob.is_queued(instance) is False: - logger.debug(f'{instance}: Queuing Job') - BackupJob.enqueue(instance) - if htmx_partial(request): return self.render('htmx/table.html', extra_context={ 'object': instance, diff --git a/netbox_config_backup/utils/__init__.py b/netbox_config_backup/utils/__init__.py index a171750..a6de2f6 100644 --- a/netbox_config_backup/utils/__init__.py +++ b/netbox_config_backup/utils/__init__.py @@ -1,15 +1,7 @@ from .backups import get_backup_tables from .git import Differ -from .rq import enqueue_if_needed, enqueue, needs_enqueue, remove_queued, remove_orphaned, is_queued, is_running __all__ = ( 'get_backup_tables', 'Differ', - 'enqueue', - 'enqueue_if_needed', - 'needs_enqueue', - 'remove_queued', - 'remove_orphaned', - 'is_running', - 'is_queued', ) \ No newline at end of file diff --git a/netbox_config_backup/utils/rq.py b/netbox_config_backup/utils/rq.py index 80fc4a6..8a801e1 100644 --- a/netbox_config_backup/utils/rq.py +++ b/netbox_config_backup/utils/rq.py @@ -43,163 +43,3 @@ def can_backup(backup): return False return True - -def enqueue(backup, delay=None): - from netbox_config_backup.models import BackupJob - - scheduled = timezone.now() - if delay is not None: - logger.info(f'{backup}: Scheduling for: {scheduled + delay} at {scheduled} ') - scheduled = timezone.now() + delay - - result = BackupJob.objects.create( - backup=backup, - job_id=uuid.uuid4(), - scheduled=scheduled, - ) - queue = get_queue('netbox_config_backup.jobs') - if delay is None: - logger.debug(f'{backup}: Enqueued') - job = queue.enqueue( - 'netbox_config_backup.tasks.backup_job', - description=f'{backup.uuid}', - job_id=str(result.job_id), - pk=result.pk, - ) - logger.info(f'{backup}: {result.job_id}') - else: - logger.debug(f'{backup}: Enqueued') - job = queue.enqueue_in( - delay, - 'netbox_config_backup.tasks.backup_job', - description=f'{backup.uuid}', - job_id=str(result.job_id), - pk=result.pk - ) - logger.info(f'{backup}: {result.job_id}') - - return job - - -def enqueue_if_needed(backup, delay=None, job_id=None): - if needs_enqueue(backup, job_id=job_id): - return enqueue(backup, delay=delay) - return False - - -def needs_enqueue(backup, job_id=None): - queue = get_queue('netbox_config_backup.jobs') - scheduled = queue.scheduled_job_registry - started = queue.started_job_registry - - scheduled_jobs = scheduled.get_job_ids() - started_jobs = started.get_job_ids() - - if not can_backup(backup): - return False - elif is_queued(backup, job_id): - logger.info(f'Backup already queued for {backup}') - return False - - return True - - -def is_running(backup, job_id=None): - queue = get_queue('netbox_config_backup.jobs') - - jobs = backup.jobs.all() - queued = jobs.filter(status__in=[JobStatusChoices.STATUS_RUNNING]) - - if job_id is not None: - queued.exclude(job_id=job_id) - - for backupjob in queued.all(): - job = queue.fetch_job(f'{backupjob.job_id}') - if job and job.is_started and job.id in queue.started_job_registry.get_job_ids() + queue.get_job_ids(): - return True - elif job and job.is_started and job.id not in queue.started_job_registry.get_job_ids(): - status = { - 'is_canceled': job.is_canceled, - 'is_deferred': job.is_deferred, - 'is_failed': job.is_failed, - 'is_finished': job.is_finished, - 'is_queued': job.is_queued, - 'is_scheduled': job.is_scheduled, - 'is_started': job.is_started, - 'is_stopped': job.is_stopped, - } - job.cancel() - backupjob.status = JobStatusChoices.STATUS_FAILED - backupjob.save() - logger.warning(f'{backup}: Job in started queue but not in a registry, cancelling {status}') - elif job and job.is_canceled: - backupjob.status = JobStatusChoices.STATUS_FAILED - backupjob.save() - return False - - -def get_scheduled(backup, job_id=None): - queue = get_queue('netbox_config_backup.jobs') - - scheduled_jobs = queue.scheduled_job_registry.get_job_ids() - started_jobs = queue.started_job_registry.get_job_ids() - queued_jobs = queue.get_job_ids() - - jobs = backup.jobs.all() - queued = jobs.filter(status__in=[JobStatusChoices.STATUS_RUNNING, JobStatusChoices.STATUS_PENDING]) - - if job_id is not None: - queued.exclude(job_id=job_id) - - for backupjob in queued.all(): - job = queue.fetch_job(f'{backupjob.job_id}') - if job and (job.is_scheduled or job.is_queued) and job.id in scheduled_jobs + started_jobs + queued_jobs: - if job.enqueued_at is not None: - return job.enqueued_at - else: - return queue.scheduled_job_registry.get_scheduled_time(job) - elif job and (job.is_scheduled or job.is_queued) and job.id not in scheduled_jobs + started_jobs: - status = { - 'is_canceled': job.is_canceled, - 'is_deferred': job.is_deferred, - 'is_failed': job.is_failed, - 'is_finished': job.is_finished, - 'is_queued': job.is_queued, - 'is_scheduled': job.is_scheduled, - 'is_started': job.is_started, - 'is_stopped': job.is_stopped, - } - job.cancel() - backupjob.status = JobStatusChoices.STATUS_FAILED - backupjob.save() - logger.warning(f'{backup}: Job in scheduled or started queue but not in a registry, cancelling {status} {scheduled_jobs + started_jobs + queued_jobs}') - elif job and job.is_canceled: - backupjob.status = JobStatusChoices.STATUS_FAILED - backupjob.save() - return None - - -def is_queued(backup, job_id=None): - if get_scheduled(backup, job_id) is not None: - return True - return False - - -def remove_orphaned(): - queue = get_queue('netbox_config_backup.jobs') - registry = ScheduledJobRegistry(queue=queue) - - for job_id in registry.get_job_ids(): - try: - BackupJob.objects.get(job_id=job_id) - except BackupJob.DoesNotExist: - registry.remove(job_id) - - -def remove_queued(backup): - queue = get_queue('netbox_config_backup.jobs') - registry = ScheduledJobRegistry(queue=queue) - for job_id in registry.get_job_ids(): - job = queue.fetch_job(f'{job_id}') - if job.description == f'{backup.uuid}': - registry.remove(f'{job_id}') diff --git a/netbox_config_backup/views.py b/netbox_config_backup/views.py index cfc6328..fe30cf6 100644 --- a/netbox_config_backup/views.py +++ b/netbox_config_backup/views.py @@ -55,10 +55,6 @@ class BackupView(ObjectView): def get_extra_context(self, request, instance): - if BackupJob.is_queued(instance) is False: - logger.debug(f'{instance}: Queuing Job') - BackupJob.enqueue_if_needed(instance) - jobs = BackupJob.objects.filter(backup=instance).order_by() is_running = True if jobs.filter(status=JobStatusChoices.STATUS_RUNNING).count() > 0 else False is_pending = True if jobs.filter(status=JobStatusChoices.STATUS_PENDING).count() > 0 else False From bfede7902b73bc29b09c3daf857943a7cbbb0fbd Mon Sep 17 00:00:00 2001 From: Daniel Sheppard Date: Tue, 1 Oct 2024 07:59:59 -0500 Subject: [PATCH 5/7] Fix view --- netbox_config_backup/views.py | 1 - 1 file changed, 1 deletion(-) diff --git a/netbox_config_backup/views.py b/netbox_config_backup/views.py index fe30cf6..428a57b 100644 --- a/netbox_config_backup/views.py +++ b/netbox_config_backup/views.py @@ -67,7 +67,6 @@ def get_extra_context(self, request, instance): status = { 'status': job_status, - 'scheduled': BackupJob.is_queued(instance), 'next_attempt': instance.next_attempt, 'last_job': instance.jobs.filter(completed__isnull=False).last(), 'last_success': instance.last_backup, From a19f4b030d2ec87c3e9a1f988be6609cd35b18af Mon Sep 17 00:00:00 2001 From: Daniel Sheppard Date: Tue, 1 Oct 2024 18:10:18 -0500 Subject: [PATCH 6/7] Mostly working backup --- netbox_config_backup/__init__.py | 2 +- netbox_config_backup/backup/processing.py | 51 ++++-- netbox_config_backup/filtersets.py | 3 +- netbox_config_backup/forms.py | 9 +- netbox_config_backup/jobs/backup.py | 145 +++++++++++------- .../management/commands/enqueue_if_needed.py | 12 -- .../management/commands/fork.py | 51 ------ .../management/commands/listbackups.py | 9 -- .../management/commands/runbackup.py | 14 -- .../netbox_config_backup/backupjob.html | 34 ++++ netbox_config_backup/utils/configs.py | 2 +- 11 files changed, 172 insertions(+), 160 deletions(-) delete mode 100644 netbox_config_backup/management/commands/enqueue_if_needed.py delete mode 100644 netbox_config_backup/management/commands/fork.py create mode 100644 netbox_config_backup/templates/netbox_config_backup/backupjob.html diff --git a/netbox_config_backup/__init__.py b/netbox_config_backup/__init__.py index 872ce32..d094da3 100644 --- a/netbox_config_backup/__init__.py +++ b/netbox_config_backup/__init__.py @@ -32,7 +32,7 @@ class NetboxConfigBackup(PluginConfig): def ready(self, *args, **kwargs): super().ready() import sys - if 'manage.py' not in sys.argv[0]: + if 'rqworker' in sys.argv[1]: from netbox import settings from netbox_config_backup.jobs.backup import BackupRunner from netbox_config_backup.models import BackupJob, Backup diff --git a/netbox_config_backup/backup/processing.py b/netbox_config_backup/backup/processing.py index 03ed751..ce2469a 100644 --- a/netbox_config_backup/backup/processing.py +++ b/netbox_config_backup/backup/processing.py @@ -1,5 +1,6 @@ import logging import os +import time import traceback from django.db.models import Q @@ -7,7 +8,7 @@ from core.choices import JobStatusChoices from netbox.api.exceptions import ServiceUnavailable -from netbox_config_backup.models import BackupJob +from netbox_config_backup.models import BackupJob, Backup from netbox_config_backup.utils.configs import check_config_save_status from netbox_config_backup.utils.napalm import napalm_init from netbox_config_backup.utils.rq import can_backup @@ -16,16 +17,26 @@ def remove_stale_backupjobs(job: BackupJob): - BackupJob.objects.filter(backup=job.backup).exclude(status=JobStatusChoices.STATUS_COMPLETED).exclude( - pk=job.pk).delete() + pass -def run_backup(backup, job): - pid = os.getpid() +def run_backup(job_id, backup_id): + logger.info(f'Starting backup for job {job_id}') + try: + job = BackupJob.objects.get(pk=job_id) + except Exception as e: + logger.error(f'Unable to load job {job_id}: {e}') + logger.debug(f'\t{traceback.format_exc()}') + raise e - job.status = JobStatusChoices.STATUS_PENDING - job.pid = pid - job.save() try: + backup = Backup.objects.get(pk=backup_id) + backup.refresh_from_db() + pid = os.getpid() + + job.status = JobStatusChoices.STATUS_PENDING + job.pid = pid + job.save() + if not can_backup(backup): job.status = JobStatusChoices.STATUS_FAILED if not job.data: @@ -37,7 +48,11 @@ def run_backup(backup, job): return commit = None - ip = backup.ip if backup.ip is not None else backup.device.primary_ip + try: + ip = backup.ip if backup.ip is not None else backup.device.primary_ip + except Exception as e: + logger.debug(f'{e}: {backup}') + raise e if ip: try: @@ -45,6 +60,7 @@ def run_backup(backup, job): except (TimeoutError, ServiceUnavailable): job.status = JobStatusChoices.STATUS_FAILED job.data = {'error': f'Timeout Connecting to {backup.device} with ip {ip}'} + logger.debug = f'Timeout Connecting to {backup.device} with ip {ip}' job.save() return @@ -87,11 +103,12 @@ def run_backup(backup, job): job.save() logger.debug(f'{backup}: No IP set') except Exception as e: - job.status = JobStatusChoices.STATUS_ERRORED - if not job.data: - job.data = {} - job.data.update({'error': f'{e}'}) - job.full_clean() - job.save() - logger.error(f'Exception in {backup}: {e}') - logger.info(f'{backup}: {traceback.format_exc()}') + logger.error(f'Exception in {job_id}: {e}') + logger.info(f'\t{traceback.format_exc()}') + if job: + job.status = JobStatusChoices.STATUS_ERRORED + if not job.data: + job.data = {} + job.data.update({'error': f'{e}'}) + job.full_clean() + job.save() diff --git a/netbox_config_backup/filtersets.py b/netbox_config_backup/filtersets.py index b494042..7c14111 100644 --- a/netbox_config_backup/filtersets.py +++ b/netbox_config_backup/filtersets.py @@ -5,6 +5,7 @@ from django.utils.translation import gettext as _ from netaddr import AddrFormatError +from core.choices import JobStatusChoices from ipam.models import IPAddress from netbox.filtersets import NetBoxModelFilterSet, BaseFilterSet from dcim.models import Device @@ -21,7 +22,7 @@ class BackupJobFilterSet(BaseFilterSet): class Meta: model = models.BackupJob - fields = ['id', ] + fields = ['id', 'status'] class BackupFilterSet(BaseFilterSet): diff --git a/netbox_config_backup/forms.py b/netbox_config_backup/forms.py index a01cfd3..9b24fba 100644 --- a/netbox_config_backup/forms.py +++ b/netbox_config_backup/forms.py @@ -3,11 +3,13 @@ from django.forms import CharField from django.utils.translation import gettext as _ +from core.choices import JobStatusChoices from dcim.choices import DeviceStatusChoices from dcim.models import Device from ipam.models import IPAddress from netbox.forms import NetBoxModelForm, NetBoxModelBulkEditForm from netbox_config_backup.models import Backup, BackupJob +from utilities.forms import add_blank_choice from utilities.forms.fields import DynamicModelChoiceField, DynamicModelMultipleChoiceField, CommentField __all__ = ( @@ -63,8 +65,13 @@ def clean(self): class BackupJobFilterSetForm(forms.Form): model = BackupJob field_order = [ - 'q', + 'q', 'status', ] + status = forms.MultipleChoiceField( + required=False, + choices=add_blank_choice(JobStatusChoices), + label=_('Status') + ) class BackupFilterSetForm(forms.Form): diff --git a/netbox_config_backup/jobs/backup.py b/netbox_config_backup/jobs/backup.py index a79178e..aeaf7c5 100644 --- a/netbox_config_backup/jobs/backup.py +++ b/netbox_config_backup/jobs/backup.py @@ -1,29 +1,45 @@ import logging import time import uuid +import traceback from datetime import timedelta from multiprocessing import Process from django.db.models import Q from django.utils import timezone +from rq.job import JobStatus from core.choices import JobStatusChoices from netbox.jobs import JobRunner from netbox_config_backup.backup.processing import run_backup from netbox_config_backup.choices import StatusChoices from netbox_config_backup.models import Backup, BackupJob +from netbox_config_backup.utils.rq import can_backup logger = logging.getLogger(f"netbox_config_backup") +class SchedulerRunner(JobRunner): + class Meta: + name = "The scheduler" + + + + class BackupRunner(JobRunner): processes = {} class Meta: name = 'The Backup Job Runner' - def clean_stale_jobs(self, old): - for job in old.all(): + def clean_stale_jobs(self): + jobs = BackupJob.objects.order_by('created').filter( + status=JobStatusChoices.ENQUEUED_STATE_CHOICES, + ).prefetch_related('device') + scheduled = jobs.filter(status=JobStatusChoices.STATUS_SCHEDULED) + stale = jobs.filter(scheduled__lt=timezone.now() - timedelta(minutes=30)) + + for job in stale: if job.pid: pass job.status = JobStatusChoices.STATUS_ERRORED @@ -31,27 +47,63 @@ def clean_stale_jobs(self, old): job.data = {} job.data.update({'error': 'Job hung'}) job.save() + job.refresh_from_db() logger.warning(f'Job {job.backup} appears stuck, deleting') - def handle_processes(self): - for pk in list(self.processes.keys()): - process = self.processes.get(pk, {}).get('process') - job_pk = self.processes.get(pk, {}).get('job') - backup = self.processes.get(pk, {}).get('backup') - if not process.is_alive(): - logger.debug(f'Terminating process {process.pid} with job pk of {pk} for {backup}') - process.terminate() - del self.processes[pk] - job = BackupJob.objects.filter(pk=job_pk).first() - if job and job.status != JobStatusChoices.STATUS_COMPLETED: - job.status = JobStatusChoices.STATUS_ERRORED + for job in scheduled: + if job != scheduled.filter(backup=job.backup).last(): + job.status = JobStatusChoices.STATUS_FAILED + if not job.data: + job.data = {} + job.data.update({'error': 'Process terminated'}) + job.save() + + def schedule_jobs(self): + backups = Backup.objects.filter(status=StatusChoices.STATUS_ACTIVE, device__isnull=False) + for backup in backups: + if can_backup(backup): + logger.debug(f'Queuing device {backup.device} for backup') + jobs = BackupJob.objects.filter(backup=backup, status__in=JobStatusChoices.ENQUEUED_STATE_CHOICES) + job = jobs.last() + if job is not None: + job.runner = self.job + job.status = JobStatusChoices.STATUS_SCHEDULED + job.scheduled = timezone.now() + job.save() + else: + job = BackupJob( + runner=self.job, + backup=backup, + status=JobStatusChoices.STATUS_SCHEDULED, + scheduled=timezone.now(), + job_id=uuid.uuid4(), + data={}, + ) + job.full_clean() + job.save() + else: + jobs = BackupJob.objects.filter(backup=backup, status__in=JobStatusChoices.ENQUEUED_STATE_CHOICES) + for job in jobs: + job.status = JobStatusChoices.STATUS_FAILED if not job.data: job.data = {} - job.data.update({'error': 'Process terminated'}) + job.data.update({'error': f'Cannot queue job'}) job.save() - def fork_process(self, backup, job): - process = Process(target=run_backup, args=(backup, job), ) + def run_processes(self): + for job in BackupJob.objects.filter(status=JobStatusChoices.STATUS_SCHEDULED): + job.refresh_from_db() + try: + process = self.fork_process(job) + process.join(1) + except Exception as e: + job.status = JobStatusChoices.STATUS_FAILED + job.data['error'] = str(e) + job.save() + + def fork_process(self, job): + backup = Backup.objects.get(pk=job.backup.pk) + process = Process(target=run_backup, args=(job.pk, backup.pk), ) data = { backup.pk: { 'process': process, @@ -61,51 +113,38 @@ def fork_process(self, backup, job): } self.processes.update(data) process.start() - logger.debug(f'Forking process {process.pid} for {backup.device} backup') + logger.debug(f'Forking process {process.pid} for {backup} backup') return process - def run(self, *args, **kwargs): - try: - running = BackupJob.objects.filter( - ~Q( - status__in=[ - JobStatusChoices.STATUS_COMPLETED, - JobStatusChoices.STATUS_ERRORED, - JobStatusChoices.STATUS_FAILED - ] - ) - ) - old = running.filter(scheduled__lt=timezone.now() - timedelta(minutes=30)) - self.clean_stale_jobs(old) - for backup in Backup.objects.filter(status=StatusChoices.STATUS_ACTIVE, device__isnull=False): - logger.debug(f'Queuing device {backup.device} for backup') - job = BackupJob( - runner=self.job, - backup=backup, - status=JobStatusChoices.STATUS_SCHEDULED, - scheduled=timezone.now(), - job_id=uuid.uuid4(), - data={}, - ) - job.full_clean() - job.save() - if backup.device and (backup.ip or backup.device.primary_ip): - process = self.fork_process(backup, job) - process.join(1) - else: - job.status = JobStatusChoices.STATUS_FAILED + def handle_processes(self): + for pk in list(self.processes.keys()): + process = self.processes.get(pk, {}).get('process') + job_pk = self.processes.get(pk, {}).get('job') + backup = self.processes.get(pk, {}).get('backup') + if not process.is_alive(): + logger.debug(f'Terminating process {process.pid} with job pk of {pk} for {backup}') + process.terminate() + del self.processes[pk] + job = BackupJob.objects.filter(pk=job_pk).first() + if job and job.status != JobStatusChoices.STATUS_COMPLETED: + job.status = JobStatusChoices.STATUS_ERRORED if not job.data: job.data = {} - job.data.update({'error': f'Cannot backup {backup} due to no device or IPs'}) + job.data.update({'error': 'Process terminated'}) job.save() - logger.warning(f'Cannot backup {backup} due to no device or IPs') + def run(self, *args, **kwargs): + try: + self.clean_stale_jobs() + time.sleep(5) + self.schedule_jobs() + time.sleep(5) + self.run_processes() while(True): self.handle_processes() if len(self.processes) == 0: return time.sleep(1) except Exception as e: - import traceback - logger.error(traceback.format_exc()) - raise e + logger.warning(f'{traceback.format_exc()}') + logger.error(f'{e}') diff --git a/netbox_config_backup/management/commands/enqueue_if_needed.py b/netbox_config_backup/management/commands/enqueue_if_needed.py deleted file mode 100644 index 8810e08..0000000 --- a/netbox_config_backup/management/commands/enqueue_if_needed.py +++ /dev/null @@ -1,12 +0,0 @@ -from django.core.management.base import BaseCommand - - -class Command(BaseCommand): - def handle(self, *args, **options): - from netbox_config_backup.models import Backup, BackupJob - - for backup in Backup.objects.all(): - if BackupJob.needs_enqueue(backup=backup): - BackupJob.enqueue_if_needed(backup=backup) - print(f'Backup: {backup} has been queued') - diff --git a/netbox_config_backup/management/commands/fork.py b/netbox_config_backup/management/commands/fork.py deleted file mode 100644 index 3eb8998..0000000 --- a/netbox_config_backup/management/commands/fork.py +++ /dev/null @@ -1,51 +0,0 @@ -import uuid - -from django.core.management.base import BaseCommand -from django.db import transaction -from django.utils import timezone - -from netbox_config_backup.models import BackupJob -from netbox_config_backup.tasks import backup_job -from netbox_config_backup.utils import remove_queued -from netbox_config_backup.utils.rq import can_backup - - -class Command(BaseCommand): - def add_arguments(self, parser): - parser.add_argument('--time', dest='time', help="time") - parser.add_argument('--device', dest='device', help="Device Name") - - def handle(self, *args, **options): - from multiprocessing import Process - import time - def test(i): - self.stdout.write(f"Child {i} is running") - self.stdout.write(f"Child {i} sleeping 10 seconds") - time.sleep(10) - if i == 1: - raise Exception(f"Child {i} exception") - self.stdout.write(f"Child {i} sleep complete") - - processes = {} - for i in range(1, 3): - p = Process(target=test, args=(i,)) - p.start() - p.join(1) - self.stdout.write(f"Child {i} running") - processes.update({p.pid: p}) - - while True: - if len(processes) == 0: - break - for pid in list(processes.keys()): - process = processes.get(pid, None) - if not process.is_alive(): - print(f'{process} not alive') - del processes[pid] - time.sleep(1) - - self.stdout.write('Finished') - - - - diff --git a/netbox_config_backup/management/commands/listbackups.py b/netbox_config_backup/management/commands/listbackups.py index 1a954d2..d9f75b9 100644 --- a/netbox_config_backup/management/commands/listbackups.py +++ b/netbox_config_backup/management/commands/listbackups.py @@ -1,13 +1,4 @@ -import uuid - from django.core.management.base import BaseCommand -from django.db import transaction -from django.utils import timezone - -from netbox_config_backup.models import BackupJob -from netbox_config_backup.tasks import backup_job -from netbox_config_backup.utils import remove_queued -from netbox_config_backup.utils.rq import can_backup class Command(BaseCommand): diff --git a/netbox_config_backup/management/commands/runbackup.py b/netbox_config_backup/management/commands/runbackup.py index b3a478c..9b846d3 100644 --- a/netbox_config_backup/management/commands/runbackup.py +++ b/netbox_config_backup/management/commands/runbackup.py @@ -5,8 +5,6 @@ from django.utils import timezone from netbox_config_backup.models import BackupJob -from netbox_config_backup.tasks import backup_job -from netbox_config_backup.utils import remove_queued from netbox_config_backup.utils.rq import can_backup @@ -15,18 +13,6 @@ def add_arguments(self, parser): parser.add_argument('--time', dest='time', help="time") parser.add_argument('--device', dest='device', help="Device Name") - def run_backup(self, backup): - if can_backup(backup): - backupjob = backup.jobs.filter(backup__device=backup.device).last() - if backupjob is None: - backupjob = BackupJob.objects.create( - backup=backup, - scheduled=timezone.now(), - uuid=uuid.uuid4() - ) - backup_job(backupjob.pk) - remove_queued(backup) - def handle(self, *args, **options): from netbox_config_backup.models import Backup if options['device']: diff --git a/netbox_config_backup/templates/netbox_config_backup/backupjob.html b/netbox_config_backup/templates/netbox_config_backup/backupjob.html new file mode 100644 index 0000000..07505c0 --- /dev/null +++ b/netbox_config_backup/templates/netbox_config_backup/backupjob.html @@ -0,0 +1,34 @@ +{% extends 'generic/object.html' %} +{% load helpers %} + +{% block subtitle %} +
+{% endblock %} + +{% block content %} +
+
+
+
+ Backup Job +
+
+ + + + + + + + + +
Backup{{ object.backup|linkify }}
Status{{ object.status }}
+
+
+ {% include 'inc/panels/comments.html' %} +
+ +
+
+
+{% endblock %} \ No newline at end of file diff --git a/netbox_config_backup/utils/configs.py b/netbox_config_backup/utils/configs.py index 336a386..293279c 100644 --- a/netbox_config_backup/utils/configs.py +++ b/netbox_config_backup/utils/configs.py @@ -6,7 +6,7 @@ def check_config_save_status(d): - logger.info(f'Switch: {d.hostname}') + logger.debug(f'Switch: {d.hostname}') platform = { 'ios': { 'running': { From a89d802ed90f983557b3790c9ecdec461d5d65e6 Mon Sep 17 00:00:00 2001 From: Daniel Sheppard Date: Tue, 1 Oct 2024 19:41:12 -0500 Subject: [PATCH 7/7] Final working backup --- netbox_config_backup/backup/processing.py | 6 ++- netbox_config_backup/jobs/backup.py | 15 ++++--- .../migrations/0018_move_to_nbmodel.py | 42 +++++++++++++++++++ netbox_config_backup/models/backups.py | 2 +- netbox_config_backup/models/jobs.py | 3 +- netbox_config_backup/utils/db.py | 5 +++ 6 files changed, 61 insertions(+), 12 deletions(-) create mode 100644 netbox_config_backup/migrations/0018_move_to_nbmodel.py create mode 100644 netbox_config_backup/utils/db.py diff --git a/netbox_config_backup/backup/processing.py b/netbox_config_backup/backup/processing.py index ce2469a..8f12054 100644 --- a/netbox_config_backup/backup/processing.py +++ b/netbox_config_backup/backup/processing.py @@ -9,6 +9,7 @@ from core.choices import JobStatusChoices from netbox.api.exceptions import ServiceUnavailable from netbox_config_backup.models import BackupJob, Backup +from netbox_config_backup.utils.db import close_db from netbox_config_backup.utils.configs import check_config_save_status from netbox_config_backup.utils.napalm import napalm_init from netbox_config_backup.utils.rq import can_backup @@ -19,7 +20,8 @@ def remove_stale_backupjobs(job: BackupJob): pass -def run_backup(job_id, backup_id): +def run_backup(job_id): + close_db() logger.info(f'Starting backup for job {job_id}') try: job = BackupJob.objects.get(pk=job_id) @@ -29,7 +31,7 @@ def run_backup(job_id, backup_id): raise e try: - backup = Backup.objects.get(pk=backup_id) + backup = Backup.objects.get(pk=job.backup.pk) backup.refresh_from_db() pid = os.getpid() diff --git a/netbox_config_backup/jobs/backup.py b/netbox_config_backup/jobs/backup.py index aeaf7c5..142b3d1 100644 --- a/netbox_config_backup/jobs/backup.py +++ b/netbox_config_backup/jobs/backup.py @@ -14,6 +14,7 @@ from netbox_config_backup.backup.processing import run_backup from netbox_config_backup.choices import StatusChoices from netbox_config_backup.models import Backup, BackupJob +from netbox_config_backup.utils.db import close_db from netbox_config_backup.utils.rq import can_backup logger = logging.getLogger(f"netbox_config_backup") @@ -92,7 +93,6 @@ def schedule_jobs(self): def run_processes(self): for job in BackupJob.objects.filter(status=JobStatusChoices.STATUS_SCHEDULED): - job.refresh_from_db() try: process = self.fork_process(job) process.join(1) @@ -102,21 +102,22 @@ def run_processes(self): job.save() def fork_process(self, job): - backup = Backup.objects.get(pk=job.backup.pk) - process = Process(target=run_backup, args=(job.pk, backup.pk), ) + close_db() + process = Process(target=run_backup, args=(job.pk, ), ) data = { - backup.pk: { + job.backup.pk: { 'process': process, - 'backup': backup.pk, + 'backup': job.backup.pk, 'job': job.pk } } self.processes.update(data) process.start() - logger.debug(f'Forking process {process.pid} for {backup} backup') + logger.debug(f'Forking process {process.pid} for {job.backup} backup') return process def handle_processes(self): + close_db() for pk in list(self.processes.keys()): process = self.processes.get(pk, {}).get('process') job_pk = self.processes.get(pk, {}).get('job') @@ -136,9 +137,7 @@ def handle_processes(self): def run(self, *args, **kwargs): try: self.clean_stale_jobs() - time.sleep(5) self.schedule_jobs() - time.sleep(5) self.run_processes() while(True): self.handle_processes() diff --git a/netbox_config_backup/migrations/0018_move_to_nbmodel.py b/netbox_config_backup/migrations/0018_move_to_nbmodel.py new file mode 100644 index 0000000..29a597a --- /dev/null +++ b/netbox_config_backup/migrations/0018_move_to_nbmodel.py @@ -0,0 +1,42 @@ +# Generated by Django 5.0.8 on 2024-10-01 23:44 + +import taggit.managers +import utilities.json +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('extras', '0121_customfield_related_object_filter'), + ('netbox_config_backup', '0017_add_job_to_backupjob'), + ] + + operations = [ + migrations.AddField( + model_name='backupjob', + name='custom_field_data', + field=models.JSONField( + blank=True, default=dict, encoder=utilities.json.CustomFieldJSONEncoder + ), + ), + migrations.AddField( + model_name='backupjob', + name='last_updated', + field=models.DateTimeField(auto_now=True, null=True), + ), + migrations.AddField( + model_name='backupjob', + name='tags', + field=taggit.managers.TaggableManager( + through='extras.TaggedItem', to='extras.Tag' + ), + ), + migrations.AlterField( + model_name='backupjob', + name='id', + field=models.BigAutoField( + auto_created=True, primary_key=True, serialize=False + ), + ), + ] diff --git a/netbox_config_backup/models/backups.py b/netbox_config_backup/models/backups.py index d84662f..49ed9c0 100644 --- a/netbox_config_backup/models/backups.py +++ b/netbox_config_backup/models/backups.py @@ -100,7 +100,7 @@ def set_config(self, configs, files=('running', 'startup'), pk=None): else: #logger.debug(f'[{pk}] Saving commit') bc = BackupCommit(sha=commit, time=time) - logger.info(f'{self}: {commit}:{bc.time}') + logger.debug(f'{self}: {commit}:{bc.time}') bc.save() for change in log.get('changes', []): diff --git a/netbox_config_backup/models/jobs.py b/netbox_config_backup/models/jobs.py index e8a9a29..33418bf 100644 --- a/netbox_config_backup/models/jobs.py +++ b/netbox_config_backup/models/jobs.py @@ -8,13 +8,14 @@ from django_rq import get_queue from core.choices import JobStatusChoices +from netbox.models import NetBoxModel from utilities.querysets import RestrictedQuerySet from .abstract import BigIDModel logger = logging.getLogger(f"netbox_config_backup") -class BackupJob(BigIDModel): +class BackupJob(NetBoxModel): runner = models.ForeignKey( verbose_name=_('Job Run'), to='core.Job', diff --git a/netbox_config_backup/utils/db.py b/netbox_config_backup/utils/db.py new file mode 100644 index 0000000..e560b15 --- /dev/null +++ b/netbox_config_backup/utils/db.py @@ -0,0 +1,5 @@ +from django import db + + +def close_db(): + db.connections.close_all()