Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New backup method #105

Merged
merged 7 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:

steps:
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

Expand Down
10 changes: 10 additions & 0 deletions netbox_config_backup/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,15 @@ class NetboxConfigBackup(PluginConfig):
]
graphql_schema = 'graphql.schema.schema'

def ready(self, *args, **kwargs):
super().ready()
import sys
if 'manage.py' not in sys.argv[0]:
from netbox import settings
from netbox_config_backup.jobs.backup import BackupRunner
from netbox_config_backup.models import BackupJob, Backup
frequency = settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency') / 60
BackupRunner.enqueue_once(interval=frequency)


config = NetboxConfigBackup
Empty file.
97 changes: 97 additions & 0 deletions netbox_config_backup/backup/processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import logging
import os
import traceback

from django.db.models import Q
from django.utils import timezone

from core.choices import JobStatusChoices
from netbox.api.exceptions import ServiceUnavailable
from netbox_config_backup.models import BackupJob
from netbox_config_backup.utils.configs import check_config_save_status
from netbox_config_backup.utils.napalm import napalm_init
from netbox_config_backup.utils.rq import can_backup

logger = logging.getLogger(f"netbox_config_backup")


def remove_stale_backupjobs(job: BackupJob):
BackupJob.objects.filter(backup=job.backup).exclude(status=JobStatusChoices.STATUS_COMPLETED).exclude(
pk=job.pk).delete()

def run_backup(backup, job):
pid = os.getpid()

job.status = JobStatusChoices.STATUS_PENDING
job.pid = pid
job.save()
try:
if not can_backup(backup):
job.status = JobStatusChoices.STATUS_FAILED
if not job.data:
job.data = {}
job.data.update({'error': f'Cannot backup {backup}'})
job.full_clean()
job.save()
logger.warning(f'Cannot backup {backup}')
return

commit = None
ip = backup.ip if backup.ip is not None else backup.device.primary_ip

if ip:
try:
d = napalm_init(backup.device, ip)
except (TimeoutError, ServiceUnavailable):
job.status = JobStatusChoices.STATUS_FAILED
job.data = {'error': f'Timeout Connecting to {backup.device} with ip {ip}'}
job.save()
return

job.status = JobStatusChoices.STATUS_RUNNING
job.started = timezone.now()
job.save()
try:
status = check_config_save_status(d)
if status is not None:
if status and not backup.config_status:
backup.config_status = status
backup.save()
elif not status and backup.config_status:
backup.config_status = status
backup.save()
elif not status and backup.config_status is None:
backup.config_status = status
backup.save()
elif status and backup.config_status is None:
backup.config_status = status
backup.save()
except Exception as e:
logger.error(f'{backup}: had error setting backup status: {e}')

configs = d.get_config()
commit = backup.set_config(configs)

d.close()
logger.info(f'{backup}: Backup complete')
job.status = JobStatusChoices.STATUS_COMPLETED
job.completed = timezone.now()
job.save()
remove_stale_backupjobs(job=job)
else:
job.status = JobStatusChoices.STATUS_FAILED
if not job.data:
job.data = {}
job.data.update({'error': f'{backup}: No IP set'})
job.full_clean()
job.save()
logger.debug(f'{backup}: No IP set')
except Exception as e:
job.status = JobStatusChoices.STATUS_ERRORED
if not job.data:
job.data = {}
job.data.update({'error': f'{e}'})
job.full_clean()
job.save()
logger.error(f'Exception in {backup}: {e}')
logger.info(f'{backup}: {traceback.format_exc()}')
11 changes: 11 additions & 0 deletions netbox_config_backup/filtersets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@
from utilities.filters import MultiValueCharFilter


class BackupJobFilterSet(BaseFilterSet):
q = django_filters.CharFilter(
method='search',
label=_('Search'),
)

class Meta:
model = models.BackupJob
fields = ['id', ]


class BackupFilterSet(BaseFilterSet):
q = django_filters.CharFilter(
method='search',
Expand Down
10 changes: 9 additions & 1 deletion netbox_config_backup/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
from dcim.models import Device
from ipam.models import IPAddress
from netbox.forms import NetBoxModelForm, NetBoxModelBulkEditForm
from netbox_config_backup.models import Backup
from netbox_config_backup.models import Backup, BackupJob
from utilities.forms.fields import DynamicModelChoiceField, DynamicModelMultipleChoiceField, CommentField

__all__ = (
'BackupForm',
'BackupJobFilterSetForm',
'BackupFilterSetForm',
'BackupBulkEditForm',
)
Expand Down Expand Up @@ -59,6 +60,13 @@ def clean(self):
raise ValidationError({'device': f'{device}\'s platform ({device.platform}) has no napalm driver'})


class BackupJobFilterSetForm(forms.Form):
model = BackupJob
field_order = [
'q',
]


class BackupFilterSetForm(forms.Form):
model = Backup
field_order = [
Expand Down
Empty file.
111 changes: 111 additions & 0 deletions netbox_config_backup/jobs/backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import logging
import time
import uuid
from datetime import timedelta
from multiprocessing import Process

from django.db.models import Q
from django.utils import timezone

from core.choices import JobStatusChoices
from netbox.jobs import JobRunner
from netbox_config_backup.backup.processing import run_backup
from netbox_config_backup.choices import StatusChoices
from netbox_config_backup.models import Backup, BackupJob

logger = logging.getLogger(f"netbox_config_backup")


class BackupRunner(JobRunner):
processes = {}

class Meta:
name = 'The Backup Job Runner'

def clean_stale_jobs(self, old):
for job in old.all():
if job.pid:
pass
job.status = JobStatusChoices.STATUS_ERRORED
if not job.data:
job.data = {}
job.data.update({'error': 'Job hung'})
job.save()
logger.warning(f'Job {job.backup} appears stuck, deleting')

def handle_processes(self):
for pk in list(self.processes.keys()):
process = self.processes.get(pk, {}).get('process')
job_pk = self.processes.get(pk, {}).get('job')
backup = self.processes.get(pk, {}).get('backup')
if not process.is_alive():
logger.debug(f'Terminating process {process.pid} with job pk of {pk} for {backup}')
process.terminate()
del self.processes[pk]
job = BackupJob.objects.filter(pk=job_pk).first()
if job and job.status != JobStatusChoices.STATUS_COMPLETED:
job.status = JobStatusChoices.STATUS_ERRORED
if not job.data:
job.data = {}
job.data.update({'error': 'Process terminated'})
job.save()

def fork_process(self, backup, job):
process = Process(target=run_backup, args=(backup, job), )
data = {
backup.pk: {
'process': process,
'backup': backup.pk,
'job': job.pk
}
}
self.processes.update(data)
process.start()
logger.debug(f'Forking process {process.pid} for {backup.device} backup')
return process

def run(self, *args, **kwargs):
try:
running = BackupJob.objects.filter(
~Q(
status__in=[
JobStatusChoices.STATUS_COMPLETED,
JobStatusChoices.STATUS_ERRORED,
JobStatusChoices.STATUS_FAILED
]
)
)
old = running.filter(scheduled__lt=timezone.now() - timedelta(minutes=30))
self.clean_stale_jobs(old)
for backup in Backup.objects.filter(status=StatusChoices.STATUS_ACTIVE, device__isnull=False):
logger.debug(f'Queuing device {backup.device} for backup')
job = BackupJob(
runner=self.job,
backup=backup,
status=JobStatusChoices.STATUS_SCHEDULED,
scheduled=timezone.now(),
job_id=uuid.uuid4(),
data={},
)
job.full_clean()
job.save()
if backup.device and (backup.ip or backup.device.primary_ip):
process = self.fork_process(backup, job)
process.join(1)
else:
job.status = JobStatusChoices.STATUS_FAILED
if not job.data:
job.data = {}
job.data.update({'error': f'Cannot backup {backup} due to no device or IPs'})
job.save()
logger.warning(f'Cannot backup {backup} due to no device or IPs')

while(True):
self.handle_processes()
if len(self.processes) == 0:
return
time.sleep(1)
except Exception as e:
import traceback
logger.error(traceback.format_exc())
raise e
5 changes: 5 additions & 0 deletions netbox_config_backup/management/commands/fork.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def test(i):
self.stdout.write(f"Child {i} is running")
self.stdout.write(f"Child {i} sleeping 10 seconds")
time.sleep(10)
if i == 1:
raise Exception(f"Child {i} exception")
self.stdout.write(f"Child {i} sleep complete")

processes = {}
Expand All @@ -38,9 +40,12 @@ def test(i):
for pid in list(processes.keys()):
process = processes.get(pid, None)
if not process.is_alive():
print(f'{process} not alive')
del processes[pid]
time.sleep(1)

self.stdout.write('Finished')




18 changes: 18 additions & 0 deletions netbox_config_backup/migrations/0016_add_pid_to_backup_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.0.8 on 2024-09-30 21:53

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('netbox_config_backup', '0015_backup_comments_backup_description'),
]

operations = [
migrations.AddField(
model_name='backupjob',
name='pid',
field=models.BigIntegerField(blank=True, null=True),
),
]
26 changes: 26 additions & 0 deletions netbox_config_backup/migrations/0017_add_job_to_backupjob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 5.0.8 on 2024-10-01 01:52

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('core', '0012_job_object_type_optional'),
('netbox_config_backup', '0016_add_pid_to_backup_job'),
]

operations = [
migrations.AddField(
model_name='backupjob',
name='runner',
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name='backup_job',
to='core.job',
),
),
]
23 changes: 0 additions & 23 deletions netbox_config_backup/models/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@
from django_rq import get_queue

from dcim.models import Device
from core.choices import JobStatusChoices
from netbox.models import PrimaryModel

from netbox_config_backup.choices import StatusChoices
from netbox_config_backup.helpers import get_repository_dir

from netbox_config_backup.utils.rq import remove_queued
from ..querysets import BackupQuerySet
from ..utils import Differ

Expand Down Expand Up @@ -58,27 +56,6 @@ def get_absolute_url(self):
def __str__(self):
return self.name

def delete(self, *args, **kwargs):
queue = get_queue('netbox_config_backup.jobs')
remove_queued(self)

super().delete(*args, **kwargs)

def enqueue_if_needed(self):
from netbox_config_backup.utils.rq import enqueue_if_needed
return enqueue_if_needed(self)

def requeue(self):
self.jobs.filter(
~Q(status=JobStatusChoices.STATUS_COMPLETED) &
~Q(status=JobStatusChoices.STATUS_FAILED) &
~Q(status=JobStatusChoices.STATUS_ERRORED)
).update(
status=JobStatusChoices.STATUS_FAILED
)
remove_queued(self)
self.enqueue_if_needed()

def get_config(self, index='HEAD'):
from netbox_config_backup.git import repository
running = repository.read(f'{self.uuid}.running')
Expand Down
Loading
Loading