diff --git a/web/api/shared_api_tasks.py b/web/api/shared_api_tasks.py index d21ca23fa..472421616 100644 --- a/web/api/shared_api_tasks.py +++ b/web/api/shared_api_tasks.py @@ -1,7 +1,7 @@ # include all the celery tasks to be used in the API, do not put in tasks.py import requests -from reNgine.common_func import create_inappnotification, get_hackerone_key_username +from reNgine.common_func import create_inappnotification, get_hackerone_key_username, is_valid_asset_identifier from reNgine.definitions import PROJECT_LEVEL_NOTIFICATION, HACKERONE_ALLOWED_ASSET_TYPES from reNgine.celery import app from reNgine.database_utils import bulk_import_targets @@ -62,12 +62,14 @@ def fetch_program_details_from_hackerone(program_handle): # in future release we will add this in target out_of_scope # we need to filter the scope that are supported by reNgine now - if asset_type in HACKERONE_ALLOWED_ASSET_TYPES and eligible_for_submission: + if asset_type in HACKERONE_ALLOWED_ASSET_TYPES \ + and eligible_for_submission \ + and is_valid_asset_identifier(asset_identifier): assets.append(asset_identifier) # in some cases asset_type is OTHER and may contain the asset - elif asset_type == 'OTHER' and ('.' in asset_identifier or asset_identifier.startswith('http')): - assets.append(asset_identifier) + # elif asset_type == 'OTHER' and ('.' in asset_identifier or asset_identifier.startswith('http')): + # assets.append(asset_identifier) # cleanup assets assets = list(set(assets)) diff --git a/web/api/views.py b/web/api/views.py index fcea8abd9..0ee9a0fb3 100644 --- a/web/api/views.py +++ b/web/api/views.py @@ -25,7 +25,7 @@ from reNgine.celery import app from reNgine.common_func import * from reNgine.database_utils import * -from reNgine.definitions import ABORTED_TASK +from reNgine.definitions import ABORTED_TASK, HACKERONE_ALLOWED_ASSET_TYPES from reNgine.tasks import * from reNgine.llm import * from reNgine.utilities import is_safe_path @@ -64,8 +64,6 @@ class HackerOneProgramViewSet(viewsets.ViewSet): API_BASE = 'https://api.hackerone.com/v1/hackers' - ALLOWED_ASSET_TYPES = ["WILDCARD", "DOMAIN", "IP_ADDRESS", "CIDR", "URL"] - def list(self, request): try: sort_by = request.query_params.get('sort_by', 'age') @@ -179,9 +177,16 @@ def program_details(self, request, pk=None): if program_details: filtered_scopes = [ scope for scope in program_details.get('relationships', {}).get('structured_scopes', {}).get('data', []) - if scope.get('attributes', {}).get('asset_type') in self.ALLOWED_ASSET_TYPES + if scope.get('attributes', {}).get('asset_type') in HACKERONE_ALLOWED_ASSET_TYPES ] + # refine filtered_scopes with that are valid assets + filtered_scopes = [ + scope for scope in filtered_scopes + if is_valid_asset_identifier(scope['attributes']['asset_identifier']) + ] + + program_details['relationships']['structured_scopes']['data'] = filtered_scopes return Response(program_details) diff --git a/web/reNgine/common_func.py b/web/reNgine/common_func.py index ad58a94a8..632cc3e36 100644 --- a/web/reNgine/common_func.py +++ b/web/reNgine/common_func.py @@ -1646,3 +1646,43 @@ def get_ips_from_cidr_range(target): return [str(ip) for ip in ipaddress.IPv4Network(target, False)] except Exception as e: logger.error(f'{target} is not a valid CIDR range. Skipping.') + + +def is_valid_asset_identifier(identifier): + """ + This function will check if the asset is supported by reNgine + As of now supported assets are: + - Domain + - IP Address + - URL + - Wildcard Domain for example *.example.com, *.example.co.uk etc + Args: + identifier: str: Identifier to check + Returns: + bool: True if identifier is valid, False otherwise + """ + domain_regex = r'^(?!-)[A-Za-z0-9-]{1,63}(? {