From 6eed786cd88aed7c0286d716d1e27833136449d1 Mon Sep 17 00:00:00 2001
From: Chin Yeung Li <tli@nexb.com>
Date: Wed, 7 Aug 2024 12:45:28 +0800
Subject: [PATCH] #566 - Working in progess to support multiple about_resource

Signed-off-by: Chin Yeung Li <tli@nexb.com>
---
 src/attributecode/gen.py                      | 170 +++++++++++-------
 src/attributecode/model.py                    |   7 +-
 src/attributecode/util.py                     |  23 ++-
 tests/test_attrib.py                          |   6 +-
 tests/test_model.py                           |  12 ++
 tests/testdata/test_model/expected.json       |   6 +-
 .../test_model/multiple_about_resource.ABOUT  |   4 +
 .../multiple_about_resource_expected.csv      |   3 +
 8 files changed, 157 insertions(+), 74 deletions(-)
 create mode 100644 tests/testdata/test_model/multiple_about_resource.ABOUT
 create mode 100644 tests/testdata/test_model/multiple_about_resource_expected.csv

diff --git a/src/attributecode/gen.py b/src/attributecode/gen.py
index 3b824c20..b1a97c35 100644
--- a/src/attributecode/gen.py
+++ b/src/attributecode/gen.py
@@ -94,12 +94,12 @@ def check_newline_in_file_field(component):
         if k in file_fields:
             try:
                 if '\n' in component[k]:
-                    if k == u'about_resource':
-                        msg = (
-                            "Multiple lines detected in 'about_resource' for '%s' which is not supported.") % component['about_resource']
-                    else:
-                        msg = ("New line character detected in '%s' for '%s' which is not supported."
-                               "\nPlease use ',' to declare multiple files.") % (k, component['about_resource'])
+                    # if k == u'about_resource':
+                    #    msg = (
+                    #        "Multiple lines detected in 'about_resource' for '%s' which is not supported.") % component['about_resource']
+                    # else:
+                    msg = ("New line character detected in '%s' for '%s' which is not supported."
+                           "\nPlease use ',' to declare multiple files.") % (k, component['about_resource'])
                     errors.append(Error(CRITICAL, msg))
             except:
                 pass
@@ -123,9 +123,6 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
     Load the inventory file at `location` for ABOUT and LICENSE files stored in
     the `base_dir`. Return a list of errors and a list of About objects
     validated against the `base_dir`.
-
-    Optionally use `reference_dir` as the directory location of extra reference
-    license and notice files to reuse.
     """
     errors = []
     abouts = []
@@ -164,21 +161,37 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
     for component in stripped_inv:
         if not from_attrib:
             if 'about_resource' in component:
-                arp = component['about_resource']
-                dup_err = check_duplicated_about_resource(arp, arp_list)
-                if dup_err:
-                    if not dup_err in errors:
-                        errors.append(dup_err)
-                else:
-                    arp_list.append(arp)
-
-                invalid_about_filename = check_about_resource_filename(arp)
-                if invalid_about_filename and not invalid_about_filename in errors:
-                    errors.append(invalid_about_filename)
+                if isinstance(component['about_resource'], str):
+                    arp = component['about_resource']
+                    dup_err = check_duplicated_about_resource(arp, arp_list)
+                    if dup_err:
+                        if dup_err not in errors:
+                            errors.append(dup_err)
+                    else:
+                        arp_list.append(arp)
 
+                    invalid_about_filename = check_about_resource_filename(arp)
+                    if invalid_about_filename and invalid_about_filename not in errors:
+                        errors.append(invalid_about_filename)
+                else:
+                    for arp in component['about_resource']:
+                        dup_err = check_duplicated_about_resource(
+                            arp, arp_list)
+                        if dup_err:
+                            if dup_err not in errors:
+                                errors.append(dup_err)
+                        else:
+                            arp_list.append(arp)
+
+                        invalid_about_filename = check_about_resource_filename(
+                            arp)
+                        if invalid_about_filename and invalid_about_filename not in errors:
+                            errors.append(invalid_about_filename)
+        """
         newline_in_file_err = check_newline_in_file_field(component)
         if newline_in_file_err:
             errors.extend(newline_in_file_err)
+        """
 
     if errors:
         return errors, abouts
@@ -197,50 +210,27 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
                     )
                     errors.append(Error(CRITICAL, msg))
                     return errors, abouts
+
         # Set about file path to '' if no 'about_resource' is provided from
         # the input
         if 'about_resource' not in fields:
             afp = ''
+            about, custom_fields_list, process_errors = process_inventory(afp, fields,
+                                                                          from_attrib, base_dir, scancode, reference_dir)
+            abouts.append(about)
         else:
-            afp = fields.get(model.About.ABOUT_RESOURCE_ATTR)
-
-        afp = util.to_posix(afp)
-        if base_dir:
-            loc = join(base_dir, afp)
-        else:
-            loc = afp
-        about = model.About(about_file_path=afp)
-        about.location = loc
-
-        # Update value for 'about_resource'
-        # keep only the filename or '.' if it's a directory
-        if 'about_resource' in fields:
-            updated_resource_value = u''
-            resource_path = fields['about_resource']
-            if resource_path.endswith(u'/'):
-                updated_resource_value = u'.'
-            else:
-                updated_resource_value = basename(resource_path)
-            fields['about_resource'] = updated_resource_value
-
-        ld_errors = about.load_dict(
-            fields,
-            base_dir,
-            scancode=scancode,
-            from_attrib=from_attrib,
-            running_inventory=False,
-            reference_dir=reference_dir,
-        )
-
-        for severity, message in ld_errors:
-            if 'Custom Field' in message:
-                field_name = message.replace('Custom Field: ', '').strip()
-                if not field_name in custom_fields_list:
-                    custom_fields_list.append(field_name)
+            if scancode:
+                afp_list = [fields.get(model.About.ABOUT_RESOURCE_ATTR)]
             else:
-                errors.append(Error(severity, message))
+                afp_list = fields.get(model.About.ABOUT_RESOURCE_ATTR)
+            for afp in afp_list:
+                about, custom_fields_list, process_errors = process_inventory(afp, fields,
+                                                                              from_attrib, base_dir, scancode, reference_dir)
+                abouts.append(about)
+
+    for err in process_errors:
+        errors.append(err)
 
-        abouts.append(about)
     if custom_fields_list:
         custom_fields_err_msg = 'Field ' + \
             str(custom_fields_list) + ' is a custom field.'
@@ -249,6 +239,66 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
     return errors, abouts
 
 
+def process_inventory(about_file_path, fields, from_attrib, base_dir, scancode, reference_dir):
+    """
+    Return About object, a list of custom fields and a list of errors and
+    validated against the `base_dir`.
+
+    Optionally use `reference_dir` as the directory location of extra reference
+    license and notice files to reuse.
+    """
+    custom_fields_list = []
+    errors = []
+    afp = util.to_posix(about_file_path)
+    if base_dir:
+        loc = join(base_dir, afp)
+    else:
+        loc = afp
+    about = model.About(about_file_path=afp)
+    about.location = loc
+
+    """
+    # Update value for 'about_resource'
+    # keep only the filename or '.' if it's a directory
+    if 'about_resource' in fields:
+        updated_resource_list = []
+        resource_path_list = fields['about_resource']
+        for resource_path in resource_path_list:
+            if resource_path.endswith(u'/'):
+                updated_resource_list.append('.')
+            else:
+                updated_resource_list.append(basename(resource_path))
+            fields['about_resource'] = updated_resource_list
+    """
+    if 'about_resource' in fields:
+        updated_resource_value = u''
+        resource_path = about.about_file_path
+        if resource_path.endswith(u'/'):
+            updated_resource_value = u'.'
+        else:
+            updated_resource_value = basename(resource_path)
+        fields['about_resource'] = updated_resource_value
+
+    ld_errors = about.load_dict(
+        fields,
+        base_dir,
+        scancode=scancode,
+        from_attrib=from_attrib,
+        running_inventory=False,
+        reference_dir=reference_dir,
+    )
+
+    for severity, message in ld_errors:
+        if 'Custom Field' in message:
+            field_name = message.replace('Custom Field: ', '').strip()
+            if field_name not in custom_fields_list:
+                custom_fields_list.append(field_name)
+        else:
+            errors.append(Error(severity, message))
+
+    return about, custom_fields_list, errors
+
+
 def update_about_resource(self):
     pass
 
@@ -283,6 +333,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license
         scancode=scancode,
         worksheet=worksheet
     )
+
     if gen_license:
         license_dict, err = model.pre_process_and_fetch_license_dict(
             abouts, api_url=api_url, api_key=api_key)
@@ -297,7 +348,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license
         about.about_file_path = about.about_file_path.strip()
         if about.about_file_path.startswith('/'):
             about.about_file_path = about.about_file_path.lstrip('/')
-        # Use the name as the ABOUT file name if about_resource is empty
+        # Use the name as the ABOUT file name if about_file_path field is empty
         if not about.about_file_path:
             about.about_file_path = about.name.value
         dump_loc = join(bdir, about.about_file_path.lstrip('/'))
@@ -319,7 +370,6 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license
             continue
 
         try:
-
             licenses_dict = {}
             if gen_license:
                 # Write generated LICENSE file
@@ -344,9 +394,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license
                             about.license_url.present = True
                         if about.spdx_license_key.value:
                             about.spdx_license_key.present = True
-
             about.dump(dump_loc, licenses_dict)
-
             if android:
                 """
                 Create MODULE_LICENSE_XXX and get context to create NOTICE file
diff --git a/src/attributecode/model.py b/src/attributecode/model.py
index 10d099d9..4c77ef69 100644
--- a/src/attributecode/model.py
+++ b/src/attributecode/model.py
@@ -1830,14 +1830,17 @@ def about_object_to_list_of_dictionary(abouts):
             # from the output location
             if 'about_resource' in ad.keys():
                 about_resource = ad['about_resource']
+                about_resource_dict = {}
                 for resource in about_resource:
                     updated_about_resource = posixpath.normpath(
                         posixpath.join(afp_parent, resource))
                     if resource == u'.':
                         if not updated_about_resource == '/':
                             updated_about_resource = updated_about_resource + '/'
-                ad['about_resource'] = dict(
-                    [(updated_about_resource, None)])
+                    about_resource_dict[updated_about_resource] = None
+                    # about_resource_list.append(updated_about_resource)
+                # ad['about_resource'] = dict([(updated_about_resource, None)])
+                ad['about_resource'] = about_resource_dict
             del ad['about_file_path']
         serialized.append(ad)
     return serialized
diff --git a/src/attributecode/util.py b/src/attributecode/util.py
index 229aac08..f86d7ca9 100644
--- a/src/attributecode/util.py
+++ b/src/attributecode/util.py
@@ -309,9 +309,13 @@ def load_csv(location):
     with open(location, mode='r', encoding='utf-8-sig',
               errors='replace') as csvfile:
         for row in csv.DictReader(csvfile):
-            # convert all the column keys to lower case
-            updated_row = {key.lower().strip(): value for key,
-                           value in row.items()}
+            updated_row = {}
+            for key, value in row.items():
+                formatted_key = key.lower().strip()
+                if formatted_key in file_fields:
+                    updated_row[formatted_key] = value.splitlines()
+                else:
+                    updated_row[formatted_key] = value
             results.append(updated_row)
     return results
 
@@ -545,8 +549,10 @@ def ungroup_licenses(licenses):
     return lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score, lic_matched_text
 
 
-# FIXME: add docstring
 def format_about_dict_output(about_dictionary_list):
+    """
+    Format the dictionary list to be able to write to a CSV output
+    """
     formatted_list = []
     for element in about_dictionary_list:
         row_list = dict()
@@ -562,8 +568,10 @@ def format_about_dict_output(about_dictionary_list):
     return formatted_list
 
 
-# FIXME: add docstring
 def format_about_dict_for_json_output(about_dictionary_list):
+    """
+    Format the dictionary list to be able to write to a JSON output
+    """
     licenses = ['license_key', 'license_name', 'license_file', 'license_url']
     json_formatted_list = []
     for element in about_dictionary_list:
@@ -812,7 +820,10 @@ def strip_inventory_value(inventory):
     for component in inventory:
         comp_dict = {}
         for key in component:
-            comp_dict[key] = str(component[key]).strip()
+            if isinstance(component[key], str):
+                comp_dict[key] = component[key].strip()
+            else:
+                comp_dict[key] = component[key]
         stripped_inventory.append(comp_dict)
     return stripped_inventory
 
diff --git a/tests/test_attrib.py b/tests/test_attrib.py
index b50bcbeb..d38052b7 100644
--- a/tests/test_attrib.py
+++ b/tests/test_attrib.py
@@ -245,8 +245,6 @@ def test_scancode_input_dup_lic_match(self):
         test_file = get_test_loc(
             'test_attrib/scancode_input/sc-dup-lic-match.json')
         errors, abouts = gen.load_inventory(test_file, scancode=True)
-        print("############################")
-        print(errors)
         # Check if there is error's level > INFO
         result = [(level, e) for level, e in errors if level > INFO]
         assert result == []
@@ -272,6 +270,10 @@ def test_scancode_input_dup_lic_match(self):
         # expected doesn't work well, it works after removed all the newline and spaces
         # assert expected == result
         # assert expected.splitlines(False) == result.splitlines(False)
+        with open("C:\\Users\\thoma\\Desktop\\tmp\\AbcTK\\566\\about\\result.html", 'w') as result_file:
+            result_file.write(result)
+        with open("C:\\Users\\thoma\\Desktop\\tmp\\AbcTK\\566\\about\\expected.html", 'w') as expected_file:
+            expected_file.write(expected)
         assert expected.replace('\n', '').replace(' ', '').replace(
             '\t', '') == result.replace('\n', '').replace(' ', '').replace('\t', '')
 
diff --git a/tests/test_model.py b/tests/test_model.py
index 48004dbc..f3f56dd7 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -1053,6 +1053,18 @@ def test_write_output_csv_with_multiple_files(self):
         expected = get_test_loc('test_model/multiple_files_expected.csv')
         check_csv(expected, result)
 
+    def test_write_output_csv_with_multiple_about_resource(self):
+        path = 'test_model/multiple_about_resource.ABOUT'
+        test_file = get_test_loc(path)
+        abouts = model.About(location=test_file, about_file_path=path)
+
+        result = get_temp_file()
+        model.write_output([abouts], result, format='csv')
+
+        expected = get_test_loc(
+            'test_model/multiple_about_resource_expected.csv')
+        check_csv(expected, result)
+
     def test_write_output_json(self):
         path = 'test_model/this.ABOUT'
         test_file = get_test_loc(path)
diff --git a/tests/testdata/test_model/expected.json b/tests/testdata/test_model/expected.json
index 516ca486..c8f5084a 100644
--- a/tests/testdata/test_model/expected.json
+++ b/tests/testdata/test_model/expected.json
@@ -1,7 +1,7 @@
 [
   {
-    "about_resource": "/test_model/", 
-    "name": "AboutCode", 
+    "about_resource": "/test_model/",
+    "name": "AboutCode",
     "version": "0.11.0"
   }
-]
\ No newline at end of file
+]
diff --git a/tests/testdata/test_model/multiple_about_resource.ABOUT b/tests/testdata/test_model/multiple_about_resource.ABOUT
new file mode 100644
index 00000000..1d19cb8c
--- /dev/null
+++ b/tests/testdata/test_model/multiple_about_resource.ABOUT
@@ -0,0 +1,4 @@
+about_resource:
+ - .
+ - multiple_files_expected.csv
+name: multiple_about_resource
diff --git a/tests/testdata/test_model/multiple_about_resource_expected.csv b/tests/testdata/test_model/multiple_about_resource_expected.csv
new file mode 100644
index 00000000..9ab07f35
--- /dev/null
+++ b/tests/testdata/test_model/multiple_about_resource_expected.csv
@@ -0,0 +1,3 @@
+about_resource,name
+"/test_model/
+/test_model/multiple_files_expected.csv",multiple_about_resource