From 6eed786cd88aed7c0286d716d1e27833136449d1 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Wed, 7 Aug 2024 12:45:28 +0800 Subject: [PATCH] #566 - Working in progess to support multiple about_resource Signed-off-by: Chin Yeung Li --- src/attributecode/gen.py | 170 +++++++++++------- src/attributecode/model.py | 7 +- src/attributecode/util.py | 23 ++- tests/test_attrib.py | 6 +- tests/test_model.py | 12 ++ tests/testdata/test_model/expected.json | 6 +- .../test_model/multiple_about_resource.ABOUT | 4 + .../multiple_about_resource_expected.csv | 3 + 8 files changed, 157 insertions(+), 74 deletions(-) create mode 100644 tests/testdata/test_model/multiple_about_resource.ABOUT create mode 100644 tests/testdata/test_model/multiple_about_resource_expected.csv diff --git a/src/attributecode/gen.py b/src/attributecode/gen.py index 3b824c20..b1a97c35 100644 --- a/src/attributecode/gen.py +++ b/src/attributecode/gen.py @@ -94,12 +94,12 @@ def check_newline_in_file_field(component): if k in file_fields: try: if '\n' in component[k]: - if k == u'about_resource': - msg = ( - "Multiple lines detected in 'about_resource' for '%s' which is not supported.") % component['about_resource'] - else: - msg = ("New line character detected in '%s' for '%s' which is not supported." - "\nPlease use ',' to declare multiple files.") % (k, component['about_resource']) + # if k == u'about_resource': + # msg = ( + # "Multiple lines detected in 'about_resource' for '%s' which is not supported.") % component['about_resource'] + # else: + msg = ("New line character detected in '%s' for '%s' which is not supported." + "\nPlease use ',' to declare multiple files.") % (k, component['about_resource']) errors.append(Error(CRITICAL, msg)) except: pass @@ -123,9 +123,6 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r Load the inventory file at `location` for ABOUT and LICENSE files stored in the `base_dir`. Return a list of errors and a list of About objects validated against the `base_dir`. - - Optionally use `reference_dir` as the directory location of extra reference - license and notice files to reuse. """ errors = [] abouts = [] @@ -164,21 +161,37 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r for component in stripped_inv: if not from_attrib: if 'about_resource' in component: - arp = component['about_resource'] - dup_err = check_duplicated_about_resource(arp, arp_list) - if dup_err: - if not dup_err in errors: - errors.append(dup_err) - else: - arp_list.append(arp) - - invalid_about_filename = check_about_resource_filename(arp) - if invalid_about_filename and not invalid_about_filename in errors: - errors.append(invalid_about_filename) + if isinstance(component['about_resource'], str): + arp = component['about_resource'] + dup_err = check_duplicated_about_resource(arp, arp_list) + if dup_err: + if dup_err not in errors: + errors.append(dup_err) + else: + arp_list.append(arp) + invalid_about_filename = check_about_resource_filename(arp) + if invalid_about_filename and invalid_about_filename not in errors: + errors.append(invalid_about_filename) + else: + for arp in component['about_resource']: + dup_err = check_duplicated_about_resource( + arp, arp_list) + if dup_err: + if dup_err not in errors: + errors.append(dup_err) + else: + arp_list.append(arp) + + invalid_about_filename = check_about_resource_filename( + arp) + if invalid_about_filename and invalid_about_filename not in errors: + errors.append(invalid_about_filename) + """ newline_in_file_err = check_newline_in_file_field(component) if newline_in_file_err: errors.extend(newline_in_file_err) + """ if errors: return errors, abouts @@ -197,50 +210,27 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r ) errors.append(Error(CRITICAL, msg)) return errors, abouts + # Set about file path to '' if no 'about_resource' is provided from # the input if 'about_resource' not in fields: afp = '' + about, custom_fields_list, process_errors = process_inventory(afp, fields, + from_attrib, base_dir, scancode, reference_dir) + abouts.append(about) else: - afp = fields.get(model.About.ABOUT_RESOURCE_ATTR) - - afp = util.to_posix(afp) - if base_dir: - loc = join(base_dir, afp) - else: - loc = afp - about = model.About(about_file_path=afp) - about.location = loc - - # Update value for 'about_resource' - # keep only the filename or '.' if it's a directory - if 'about_resource' in fields: - updated_resource_value = u'' - resource_path = fields['about_resource'] - if resource_path.endswith(u'/'): - updated_resource_value = u'.' - else: - updated_resource_value = basename(resource_path) - fields['about_resource'] = updated_resource_value - - ld_errors = about.load_dict( - fields, - base_dir, - scancode=scancode, - from_attrib=from_attrib, - running_inventory=False, - reference_dir=reference_dir, - ) - - for severity, message in ld_errors: - if 'Custom Field' in message: - field_name = message.replace('Custom Field: ', '').strip() - if not field_name in custom_fields_list: - custom_fields_list.append(field_name) + if scancode: + afp_list = [fields.get(model.About.ABOUT_RESOURCE_ATTR)] else: - errors.append(Error(severity, message)) + afp_list = fields.get(model.About.ABOUT_RESOURCE_ATTR) + for afp in afp_list: + about, custom_fields_list, process_errors = process_inventory(afp, fields, + from_attrib, base_dir, scancode, reference_dir) + abouts.append(about) + + for err in process_errors: + errors.append(err) - abouts.append(about) if custom_fields_list: custom_fields_err_msg = 'Field ' + \ str(custom_fields_list) + ' is a custom field.' @@ -249,6 +239,66 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r return errors, abouts +def process_inventory(about_file_path, fields, from_attrib, base_dir, scancode, reference_dir): + """ + Return About object, a list of custom fields and a list of errors and + validated against the `base_dir`. + + Optionally use `reference_dir` as the directory location of extra reference + license and notice files to reuse. + """ + custom_fields_list = [] + errors = [] + afp = util.to_posix(about_file_path) + if base_dir: + loc = join(base_dir, afp) + else: + loc = afp + about = model.About(about_file_path=afp) + about.location = loc + + """ + # Update value for 'about_resource' + # keep only the filename or '.' if it's a directory + if 'about_resource' in fields: + updated_resource_list = [] + resource_path_list = fields['about_resource'] + for resource_path in resource_path_list: + if resource_path.endswith(u'/'): + updated_resource_list.append('.') + else: + updated_resource_list.append(basename(resource_path)) + fields['about_resource'] = updated_resource_list + """ + if 'about_resource' in fields: + updated_resource_value = u'' + resource_path = about.about_file_path + if resource_path.endswith(u'/'): + updated_resource_value = u'.' + else: + updated_resource_value = basename(resource_path) + fields['about_resource'] = updated_resource_value + + ld_errors = about.load_dict( + fields, + base_dir, + scancode=scancode, + from_attrib=from_attrib, + running_inventory=False, + reference_dir=reference_dir, + ) + + for severity, message in ld_errors: + if 'Custom Field' in message: + field_name = message.replace('Custom Field: ', '').strip() + if field_name not in custom_fields_list: + custom_fields_list.append(field_name) + else: + errors.append(Error(severity, message)) + + return about, custom_fields_list, errors + + def update_about_resource(self): pass @@ -283,6 +333,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license scancode=scancode, worksheet=worksheet ) + if gen_license: license_dict, err = model.pre_process_and_fetch_license_dict( abouts, api_url=api_url, api_key=api_key) @@ -297,7 +348,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license about.about_file_path = about.about_file_path.strip() if about.about_file_path.startswith('/'): about.about_file_path = about.about_file_path.lstrip('/') - # Use the name as the ABOUT file name if about_resource is empty + # Use the name as the ABOUT file name if about_file_path field is empty if not about.about_file_path: about.about_file_path = about.name.value dump_loc = join(bdir, about.about_file_path.lstrip('/')) @@ -319,7 +370,6 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license continue try: - licenses_dict = {} if gen_license: # Write generated LICENSE file @@ -344,9 +394,7 @@ def generate(location, base_dir, android=None, reference_dir=None, fetch_license about.license_url.present = True if about.spdx_license_key.value: about.spdx_license_key.present = True - about.dump(dump_loc, licenses_dict) - if android: """ Create MODULE_LICENSE_XXX and get context to create NOTICE file diff --git a/src/attributecode/model.py b/src/attributecode/model.py index 10d099d9..4c77ef69 100644 --- a/src/attributecode/model.py +++ b/src/attributecode/model.py @@ -1830,14 +1830,17 @@ def about_object_to_list_of_dictionary(abouts): # from the output location if 'about_resource' in ad.keys(): about_resource = ad['about_resource'] + about_resource_dict = {} for resource in about_resource: updated_about_resource = posixpath.normpath( posixpath.join(afp_parent, resource)) if resource == u'.': if not updated_about_resource == '/': updated_about_resource = updated_about_resource + '/' - ad['about_resource'] = dict( - [(updated_about_resource, None)]) + about_resource_dict[updated_about_resource] = None + # about_resource_list.append(updated_about_resource) + # ad['about_resource'] = dict([(updated_about_resource, None)]) + ad['about_resource'] = about_resource_dict del ad['about_file_path'] serialized.append(ad) return serialized diff --git a/src/attributecode/util.py b/src/attributecode/util.py index 229aac08..f86d7ca9 100644 --- a/src/attributecode/util.py +++ b/src/attributecode/util.py @@ -309,9 +309,13 @@ def load_csv(location): with open(location, mode='r', encoding='utf-8-sig', errors='replace') as csvfile: for row in csv.DictReader(csvfile): - # convert all the column keys to lower case - updated_row = {key.lower().strip(): value for key, - value in row.items()} + updated_row = {} + for key, value in row.items(): + formatted_key = key.lower().strip() + if formatted_key in file_fields: + updated_row[formatted_key] = value.splitlines() + else: + updated_row[formatted_key] = value results.append(updated_row) return results @@ -545,8 +549,10 @@ def ungroup_licenses(licenses): return lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score, lic_matched_text -# FIXME: add docstring def format_about_dict_output(about_dictionary_list): + """ + Format the dictionary list to be able to write to a CSV output + """ formatted_list = [] for element in about_dictionary_list: row_list = dict() @@ -562,8 +568,10 @@ def format_about_dict_output(about_dictionary_list): return formatted_list -# FIXME: add docstring def format_about_dict_for_json_output(about_dictionary_list): + """ + Format the dictionary list to be able to write to a JSON output + """ licenses = ['license_key', 'license_name', 'license_file', 'license_url'] json_formatted_list = [] for element in about_dictionary_list: @@ -812,7 +820,10 @@ def strip_inventory_value(inventory): for component in inventory: comp_dict = {} for key in component: - comp_dict[key] = str(component[key]).strip() + if isinstance(component[key], str): + comp_dict[key] = component[key].strip() + else: + comp_dict[key] = component[key] stripped_inventory.append(comp_dict) return stripped_inventory diff --git a/tests/test_attrib.py b/tests/test_attrib.py index b50bcbeb..d38052b7 100644 --- a/tests/test_attrib.py +++ b/tests/test_attrib.py @@ -245,8 +245,6 @@ def test_scancode_input_dup_lic_match(self): test_file = get_test_loc( 'test_attrib/scancode_input/sc-dup-lic-match.json') errors, abouts = gen.load_inventory(test_file, scancode=True) - print("############################") - print(errors) # Check if there is error's level > INFO result = [(level, e) for level, e in errors if level > INFO] assert result == [] @@ -272,6 +270,10 @@ def test_scancode_input_dup_lic_match(self): # expected doesn't work well, it works after removed all the newline and spaces # assert expected == result # assert expected.splitlines(False) == result.splitlines(False) + with open("C:\\Users\\thoma\\Desktop\\tmp\\AbcTK\\566\\about\\result.html", 'w') as result_file: + result_file.write(result) + with open("C:\\Users\\thoma\\Desktop\\tmp\\AbcTK\\566\\about\\expected.html", 'w') as expected_file: + expected_file.write(expected) assert expected.replace('\n', '').replace(' ', '').replace( '\t', '') == result.replace('\n', '').replace(' ', '').replace('\t', '') diff --git a/tests/test_model.py b/tests/test_model.py index 48004dbc..f3f56dd7 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1053,6 +1053,18 @@ def test_write_output_csv_with_multiple_files(self): expected = get_test_loc('test_model/multiple_files_expected.csv') check_csv(expected, result) + def test_write_output_csv_with_multiple_about_resource(self): + path = 'test_model/multiple_about_resource.ABOUT' + test_file = get_test_loc(path) + abouts = model.About(location=test_file, about_file_path=path) + + result = get_temp_file() + model.write_output([abouts], result, format='csv') + + expected = get_test_loc( + 'test_model/multiple_about_resource_expected.csv') + check_csv(expected, result) + def test_write_output_json(self): path = 'test_model/this.ABOUT' test_file = get_test_loc(path) diff --git a/tests/testdata/test_model/expected.json b/tests/testdata/test_model/expected.json index 516ca486..c8f5084a 100644 --- a/tests/testdata/test_model/expected.json +++ b/tests/testdata/test_model/expected.json @@ -1,7 +1,7 @@ [ { - "about_resource": "/test_model/", - "name": "AboutCode", + "about_resource": "/test_model/", + "name": "AboutCode", "version": "0.11.0" } -] \ No newline at end of file +] diff --git a/tests/testdata/test_model/multiple_about_resource.ABOUT b/tests/testdata/test_model/multiple_about_resource.ABOUT new file mode 100644 index 00000000..1d19cb8c --- /dev/null +++ b/tests/testdata/test_model/multiple_about_resource.ABOUT @@ -0,0 +1,4 @@ +about_resource: + - . + - multiple_files_expected.csv +name: multiple_about_resource diff --git a/tests/testdata/test_model/multiple_about_resource_expected.csv b/tests/testdata/test_model/multiple_about_resource_expected.csv new file mode 100644 index 00000000..9ab07f35 --- /dev/null +++ b/tests/testdata/test_model/multiple_about_resource_expected.csv @@ -0,0 +1,3 @@ +about_resource,name +"/test_model/ +/test_model/multiple_files_expected.csv",multiple_about_resource