Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: move string utility methods from onedrive-support #1033

Merged
merged 2 commits into from
Dec 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions packages/helix-shared-string/src/string.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,101 @@ export function multiline(str) {
.map((l) => l.slice(prefixLen)) // discard prefixes
.join('\n');
}

/**
* Splits the given name at the last '.', returning the extension and the base name.
* @param {string} name Filename
* @returns {string[]} Returns an array containing the base name and extension.
*/
export function splitByExtension(name) {
const idx = name.lastIndexOf('.');
const baseName = idx > 0 && idx < name.length - 1 ? name.substring(0, idx) : name;
const ext = idx > 0 && idx < name.length - 1 ? name.substring(idx + 1).toLowerCase() : '';
return [baseName, ext];
}

/**
* Sanitizes the given string by :
* - convert to lower case
* - normalize all unicode characters
* - replace all non-alphanumeric characters with a dash
* - remove all consecutive dashes
* - remove all leading and trailing dashes
*
* @param {string} name
* @returns {string} sanitized name
*/
export function sanitizeName(name) {
return name
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '');
}

/**
* Sanitizes the file path by:
* - convert to lower case
* - normalize all unicode characters
* - replace all non-alphanumeric characters with a dash
* - remove all consecutive dashes
* - remove all leading and trailing dashes
*
* Note that only the basename of the file path is sanitized. i.e. The ancestor path and the
* extension is not affected.
*
* @param {string} filepath the file path
* @param {object} opts Options
* @param {boolean} [opts.ignoreExtension] if {@code true} ignores the extension
* @returns {string} sanitized file path
*/
export function sanitizePath(filepath, opts = {}) {
const idx = filepath.lastIndexOf('/') + 1;
const extIdx = opts.ignoreExtension ? -1 : filepath.lastIndexOf('.');
const pfx = filepath.substring(0, idx);
const basename = extIdx < idx ? filepath.substring(idx) : filepath.substring(idx, extIdx);
const ext = extIdx < idx ? '' : filepath.substring(extIdx);
const name = sanitizeName(basename);
return `${pfx}${name}${ext}`;
}

/**
* Compute the edit distance using a recursive algorithm. since we only expect to have relative
* short filenames, the algorithm shouldn't be too expensive.
*
* @param {string} s0 Input string
* @param {string} s1 Input string
* @returns {number|*}
*/
export function editDistance(s0, s1) {
// make sure that s0 length is greater than s1 length
if (s0.length < s1.length) {
const t = s1;
// eslint-disable-next-line no-param-reassign
s1 = s0;
// eslint-disable-next-line no-param-reassign
s0 = t;
}
const l0 = s0.length;
const l1 = s1.length;

// init first row
const resultMatrix = [[]];
for (let c = 0; c < l1 + 1; c += 1) {
resultMatrix[0][c] = c;
}
// fill out the distance matrix and find the best path
for (let i = 1; i < l0 + 1; i += 1) {
resultMatrix[i] = [i];
for (let j = 1; j < l1 + 1; j += 1) {
const replaceCost = (s0.charAt(i - 1) === s1.charAt(j - 1)) ? 0 : 1;
resultMatrix[i][j] = Math.min(
resultMatrix[i - 1][j] + 1, // insert
resultMatrix[i][j - 1] + 1, // remove
resultMatrix[i - 1][j - 1] + replaceCost,
);
}
}
return resultMatrix[l0][l1];
}
140 changes: 139 additions & 1 deletion packages/helix-shared-string/test/string.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
/* eslint-env mocha */

import assert from 'assert';
import { multiline } from '../src/string.js';
import {
multiline, editDistance, sanitizeName, sanitizePath, splitByExtension,
} from '../src/string.js';

describe('String tests', () => {
it('multiline()', () => {
Expand All @@ -39,3 +41,139 @@ describe('String tests', () => {
`);
});
});

describe('splitByExtension Tests', () => {
it('extension split works for empty string', () => {
assert.deepStrictEqual(['', ''], splitByExtension(''));
});

it('extension split works for string w/o extension', () => {
assert.deepStrictEqual(['foo', ''], splitByExtension('foo'));
});

it('extension split works for string with extension', () => {
assert.deepStrictEqual(['foo', 'txt'], splitByExtension('foo.txt'));
});

it('extension split works for string with dots and extension', () => {
assert.deepStrictEqual(['foo.bar', 'txt'], splitByExtension('foo.bar.txt'));
});

it('extension split works for string ending with a dot', () => {
assert.deepStrictEqual(['foo.', ''], splitByExtension('foo.'));
});

it('extension split works for string starting with a dot', () => {
assert.deepStrictEqual(['.foo', ''], splitByExtension('.foo'));
});
});

describe('sanitize Tests', () => {
it('sanitize works for empty string', () => {
assert.strictEqual(sanitizeName(''), '');
});

it('sanitize transform string to lower case', () => {
assert.strictEqual(sanitizeName('MyDocument'), 'mydocument');
});

it('sanitize transforms non-alpha to dashes', () => {
assert.strictEqual(sanitizeName('My 2. Document'), 'my-2-document');
});

it('sanitize removes leading dashes', () => {
assert.strictEqual(sanitizeName('.My 2. Document'), 'my-2-document');
});

it('sanitize removes trailing dashes', () => {
assert.strictEqual(sanitizeName('.My 2. Document-'), 'my-2-document');
});

it('sanitize normalizes unicode', () => {
assert.strictEqual(sanitizeName('Föhren Smürd'), 'fohren-smurd');
});
});

describe('editDistance Tests', () => {
it('editDistances works for empty strings', () => {
assert.strictEqual(0, editDistance('', ''));
});

it('editDistances works for equal strings', () => {
assert.strictEqual(0, editDistance('foo', 'foo'));
});

it('editDistances works for appended characters', () => {
assert.strictEqual(3, editDistance('foo', 'foo123'));
});

it('editDistances works for removed characters from the end', () => {
assert.strictEqual(3, editDistance('foo123', 'foo'));
});

it('editDistances works for replaced characters', () => {
assert.strictEqual(3, editDistance('My Document', 'my-document'));
});

it('editDistances works for more complicate replacements', () => {
assert.strictEqual(5, editDistance('My 1. Document', 'my-1-document'));
});

it('editDistances works for more complicate replacements (2)', () => {
assert.strictEqual(10, editDistance('my-1-document', 'My 1. Document.docx'));
});

it('editDistances is reasonably fast for long names)', () => {
const t0 = Date.now();
assert.strictEqual(66, editDistance(
'my-1-document my-1-document my-1-document my-1-document my-1-document my-1-document my-1-document my-1-document my-1-document my-1-document my-1-document my-1-document ',
'My 1. Document My 1. Document My 1. Document My 1. Document My 1. Document My 1. Document My 1. Document My 1. Document My 1. Document My 1. Document My 1. Document .docx',
));
const t1 = Date.now();
assert.ok(t1 - t0 < 100);
});
});

describe('sanitizePath Tests', () => {
it('sanitizePath works for empty string', () => {
assert.strictEqual(sanitizePath(''), '');
});

it('sanitizePath transform string to lower case', () => {
assert.strictEqual(sanitizePath('MyDocument'), 'mydocument');
});

it('sanitizePath can ignore extension', () => {
assert.strictEqual(sanitizePath('.MyDocument', {
ignoreExtension: true,
}), 'mydocument');
});

it('sanitizePath works with dots in path and no extension', () => {
assert.strictEqual(sanitizePath('/foo.bar/My Document'), '/foo.bar/my-document');
});

it('sanitizePath only transforms last path segment', () => {
assert.strictEqual(sanitizePath('/Untitled Folder/MyDocument'), '/Untitled Folder/mydocument');
});

it('sanitizePath only transforms root segment', () => {
assert.strictEqual(sanitizePath('/MyDocument'), '/mydocument');
});

it('sanitizePath transforms non-alpha to dashes', () => {
assert.strictEqual(sanitizePath('My 2. Document.docx'), 'my-2-document.docx');
});

it('sanitizePath removes leading dashes', () => {
assert.strictEqual(sanitizePath('.My 2. Document.docx'), 'my-2-document.docx');
});

it('sanitizePath removes trailing dashes', () => {
assert.strictEqual(sanitizePath('.My 2. Document!.docx'), 'my-2-document.docx');
});

it('sanitizePath normalizes unicode', () => {
assert.strictEqual(sanitizePath('Föhren Smürd'), 'fohren-smurd');
});
});
Loading