From 251cfcdde7ac54525eef49341a9ede52d368cf71 Mon Sep 17 00:00:00 2001
From: Alexandre Capt
Date: Mon, 13 Nov 2023 14:10:30 +0100
Subject: [PATCH] feat: improve default import (#261)
---
package-lock.json | 4 +-
src/importer/HTML2x.js | 23 +---
src/importer/defaults/generateDocumentPath.js | 24 +++++
.../defaults/rules/adjustImageUrls.js | 28 +++++
src/importer/defaults/rules/convertIcons.js | 25 +++++
src/importer/defaults/rules/createMetadata.js | 102 ++++++++++++++++++
.../rules/transformBackgroundImages.js | 21 ++++
src/importer/defaults/transformDOM.js | 42 ++++++++
src/index.js | 13 +++
src/utils/DOMUtils.js | 6 +-
test/TestUtils.js | 21 ++++
test/browser/DOMUtils.test.js | 67 ++++++++++++
test/importers/HTML2x.spec.js | 41 ++-----
test/importers/PageImporter.spec.js | 4 +
.../fixtures/adjust-image-urls.expected.html | 7 ++
.../fixtures/adjust-image-urls.input.html | 10 ++
.../fixtures/background-image.expected.html | 13 +++
.../fixtures/background-image.input.html | 10 ++
.../defaults/fixtures/cleanup.expected.html | 5 +
.../defaults/fixtures/cleanup.input.html | 11 ++
.../defaults/fixtures/default.expected.html | 4 +
.../defaults/fixtures/default.input.html | 6 ++
.../defaults/fixtures/icons.expected.html | 4 +
.../defaults/fixtures/icons.input.html | 6 ++
.../fixtures/metadata.all.diff.expected.html | 40 +++++++
.../fixtures/metadata.all.diff.input.html | 17 +++
.../fixtures/metadata.all.same.expected.html | 20 ++++
.../fixtures/metadata.all.same.input.html | 17 +++
.../fixtures/metadata.basic.expected.html | 16 +++
.../fixtures/metadata.basic.input.html | 9 ++
.../fixtures/metadata.image.expected.html | 12 +++
.../fixtures/metadata.image.input.html | 9 ++
.../fixtures/metadata.og.expected.html | 16 +++
.../defaults/fixtures/metadata.og.input.html | 9 ++
.../fixtures/metadata.twitter.expected.html | 16 +++
.../fixtures/metadata.twitter.input.html | 9 ++
.../defaults/generateDocumentPath.spec.js | 32 ++++++
test/importers/defaults/transformDOM.spec.js | 94 ++++++++++++++++
test/importers/fixtures/video.spec.html | 11 ++
test/importers/fixtures/video.spec.md | 7 ++
test/utils/DOMUtils.spec.js | 7 +-
41 files changed, 774 insertions(+), 64 deletions(-)
create mode 100644 src/importer/defaults/generateDocumentPath.js
create mode 100644 src/importer/defaults/rules/adjustImageUrls.js
create mode 100644 src/importer/defaults/rules/convertIcons.js
create mode 100644 src/importer/defaults/rules/createMetadata.js
create mode 100644 src/importer/defaults/rules/transformBackgroundImages.js
create mode 100644 src/importer/defaults/transformDOM.js
create mode 100644 test/TestUtils.js
create mode 100644 test/browser/DOMUtils.test.js
create mode 100644 test/importers/defaults/fixtures/adjust-image-urls.expected.html
create mode 100644 test/importers/defaults/fixtures/adjust-image-urls.input.html
create mode 100644 test/importers/defaults/fixtures/background-image.expected.html
create mode 100644 test/importers/defaults/fixtures/background-image.input.html
create mode 100644 test/importers/defaults/fixtures/cleanup.expected.html
create mode 100644 test/importers/defaults/fixtures/cleanup.input.html
create mode 100644 test/importers/defaults/fixtures/default.expected.html
create mode 100644 test/importers/defaults/fixtures/default.input.html
create mode 100644 test/importers/defaults/fixtures/icons.expected.html
create mode 100644 test/importers/defaults/fixtures/icons.input.html
create mode 100644 test/importers/defaults/fixtures/metadata.all.diff.expected.html
create mode 100644 test/importers/defaults/fixtures/metadata.all.diff.input.html
create mode 100644 test/importers/defaults/fixtures/metadata.all.same.expected.html
create mode 100644 test/importers/defaults/fixtures/metadata.all.same.input.html
create mode 100644 test/importers/defaults/fixtures/metadata.basic.expected.html
create mode 100644 test/importers/defaults/fixtures/metadata.basic.input.html
create mode 100644 test/importers/defaults/fixtures/metadata.image.expected.html
create mode 100644 test/importers/defaults/fixtures/metadata.image.input.html
create mode 100644 test/importers/defaults/fixtures/metadata.og.expected.html
create mode 100644 test/importers/defaults/fixtures/metadata.og.input.html
create mode 100644 test/importers/defaults/fixtures/metadata.twitter.expected.html
create mode 100644 test/importers/defaults/fixtures/metadata.twitter.input.html
create mode 100644 test/importers/defaults/generateDocumentPath.spec.js
create mode 100644 test/importers/defaults/transformDOM.spec.js
create mode 100644 test/importers/fixtures/video.spec.html
create mode 100644 test/importers/fixtures/video.spec.md
diff --git a/package-lock.json b/package-lock.json
index 53f0424..8738ff6 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "@adobe/helix-importer",
- "version": "2.9.41",
+ "version": "3.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@adobe/helix-importer",
- "version": "2.9.41",
+ "version": "3.0.0",
"license": "Apache-2.0",
"dependencies": {
"@adobe/helix-markdown-support": "7.1.0",
diff --git a/src/importer/HTML2x.js b/src/importer/HTML2x.js
index ad9fa41..2f6682c 100644
--- a/src/importer/HTML2x.js
+++ b/src/importer/HTML2x.js
@@ -17,6 +17,8 @@ import PageImporterResource from './PageImporterResource.js';
import MemoryHandler from '../storage/MemoryHandler.js';
import Utils from '../utils/Utils.js';
import BrowserUtils from '../utils/BrowserUtils.js';
+import defaultTransformDOM from './defaults/transformDOM.js';
+import defaultGenerateDocumentPath from './defaults/generateDocumentPath.js';
// import docxStylesXML from '../resources/styles.xml';
@@ -35,27 +37,6 @@ function setBackgroundImagesFromCSS(document) {
}
}
-async function defaultTransformDOM({
- // eslint-disable-next-line no-unused-vars
- url, document, html, params,
-}) {
- return document.body;
-}
-
-async function defaultGenerateDocumentPath({
- // eslint-disable-next-line no-unused-vars
- url, document, html, params,
-}) {
- let p = new URL(url).pathname;
- if (p.endsWith('/')) {
- p = `${p}index`;
- }
- return decodeURIComponent(p)
- .toLowerCase()
- .replace(/\.html$/, '')
- .replace(/[^a-z0-9/]/gm, '-');
-}
-
async function html2x(
url,
doc,
diff --git a/src/importer/defaults/generateDocumentPath.js b/src/importer/defaults/generateDocumentPath.js
new file mode 100644
index 0000000..9912ead
--- /dev/null
+++ b/src/importer/defaults/generateDocumentPath.js
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+export default async function generateDocumentPath({
+ // eslint-disable-next-line no-unused-vars
+ url, document, html, params,
+}) {
+ let p = new URL(url).pathname;
+ if (p.endsWith('/')) {
+ p = `${p}index`;
+ }
+ return decodeURIComponent(p)
+ .toLowerCase()
+ .replace(/\.html$/, '')
+ .replace(/[^a-z0-9/]/gm, '-');
+}
diff --git a/src/importer/defaults/rules/adjustImageUrls.js b/src/importer/defaults/rules/adjustImageUrls.js
new file mode 100644
index 0000000..8a10c3f
--- /dev/null
+++ b/src/importer/defaults/rules/adjustImageUrls.js
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+export default function adjustImageUrls(main, url) {
+ [...main.querySelectorAll('img')].forEach((img) => {
+ const src = img.getAttribute('src');
+ if (src && (src.startsWith('./') || src.startsWith('/') || src.startsWith('../'))) {
+ try {
+ const u = new URL(src, url);
+ // eslint-disable-next-line no-param-reassign
+ img.src = u.toString();
+ } catch (e) {
+ // eslint-disable-next-line no-console
+ console.log(`Unable to adjust image URL ${img.src} - removing image`);
+ img.remove();
+ }
+ }
+ });
+}
diff --git a/src/importer/defaults/rules/convertIcons.js b/src/importer/defaults/rules/convertIcons.js
new file mode 100644
index 0000000..b2692cb
--- /dev/null
+++ b/src/importer/defaults/rules/convertIcons.js
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+export default function convertIcons(main, document) {
+ [...main.querySelectorAll('img')].forEach((img) => {
+ const src = img.getAttribute('src');
+ if (src && src.endsWith('.svg')) {
+ const span = document.createElement('span');
+ const name = src.split('/').pop().split('.')[0].toLowerCase().trim().replace(/[^a-z0-9]/g, '-');
+ if (name) {
+ span.innerHTML = `:${name}:`;
+ img.replaceWith(span);
+ }
+ }
+ });
+}
diff --git a/src/importer/defaults/rules/createMetadata.js b/src/importer/defaults/rules/createMetadata.js
new file mode 100644
index 0000000..4dd5ddf
--- /dev/null
+++ b/src/importer/defaults/rules/createMetadata.js
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+import Blocks from '../../../utils/Blocks.js';
+
+function getMetadata(name, document) {
+ const attr = name && name.includes(':') ? 'property' : 'name';
+ const meta = [...document.head.querySelectorAll(`meta[${attr}="${name}"]`)]
+ .map((m) => m.content)
+ .join(', ');
+ return meta || '';
+}
+
+export default function createMetadata(main, document) {
+ const meta = {};
+
+ const title = document.querySelector('title');
+ if (title) {
+ meta.Title = title.textContent.replace(/[\n\t]/gm, '');
+ }
+
+ const desc = getMetadata('description', document);
+ if (desc) {
+ meta.Description = desc;
+ }
+
+ const img = getMetadata('og:image', document);
+ if (img) {
+ const el = document.createElement('img');
+ el.src = img;
+ meta.Image = el;
+
+ const imgAlt = getMetadata('og:image:alt', document);
+ if (imgAlt) {
+ el.alt = imgAlt;
+ }
+ }
+
+ const ogtitle = getMetadata('og:title', document);
+ if (ogtitle && ogtitle !== meta.Title) {
+ if (meta.Title) {
+ meta['og:title'] = ogtitle;
+ } else {
+ meta.Title = ogtitle;
+ }
+ }
+
+ const ogdesc = getMetadata('og:description', document);
+ if (ogdesc && ogdesc !== meta.Description) {
+ if (meta.Description) {
+ meta['og:description'] = ogdesc;
+ } else {
+ meta.Description = ogdesc;
+ }
+ }
+
+ const ttitle = getMetadata('twitter:title', document);
+ if (ttitle && ttitle !== meta.Title) {
+ if (meta.Title) {
+ meta['twitter:title'] = ttitle;
+ } else {
+ meta.Title = ttitle;
+ }
+ }
+
+ const tdesc = getMetadata('twitter:description', document);
+ if (tdesc && tdesc !== meta.Description) {
+ if (meta.Description) {
+ meta['twitter:description'] = tdesc;
+ } else {
+ meta.Description = tdesc;
+ }
+ }
+
+ const timg = getMetadata('twitter:image', document);
+ if (timg && timg !== img) {
+ const el = document.createElement('img');
+ el.src = timg;
+ meta['twitter:image'] = el;
+
+ const imgAlt = getMetadata('twitter:image:alt', document);
+ if (imgAlt) {
+ el.alt = imgAlt;
+ }
+ }
+
+ if (Object.keys(meta).length > 0) {
+ const block = Blocks.getMetadataBlock(document, meta);
+ main.append(block);
+ }
+
+ return meta;
+}
diff --git a/src/importer/defaults/rules/transformBackgroundImages.js b/src/importer/defaults/rules/transformBackgroundImages.js
new file mode 100644
index 0000000..65041cb
--- /dev/null
+++ b/src/importer/defaults/rules/transformBackgroundImages.js
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+import DOMUtils from '../../../utils/DOMUtils.js';
+
+export default function transformBackgroundImages(main, document) {
+ [...main.querySelectorAll('[style*="background-image: url"]')].forEach((element) => {
+ const img = DOMUtils.getImgFromBackground(element, document);
+ element.prepend(img);
+ element.style.removeProperty('background-image');
+ });
+}
diff --git a/src/importer/defaults/transformDOM.js b/src/importer/defaults/transformDOM.js
new file mode 100644
index 0000000..4e14ea6
--- /dev/null
+++ b/src/importer/defaults/transformDOM.js
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+import DOMUtils from '../../utils/DOMUtils.js';
+import createMetadata from './rules/createMetadata.js';
+import adjustImageUrls from './rules/adjustImageUrls.js';
+import convertIcons from './rules/convertIcons.js';
+import transformBackgroundImages from './rules/transformBackgroundImages.js';
+
+export default async function transformDOM({
+ // eslint-disable-next-line no-unused-vars
+ url, document, html, params,
+}) {
+ const main = document.body;
+
+ // attempt to remove non-content elements
+ DOMUtils.remove(main, [
+ 'header',
+ '.header',
+ 'nav',
+ '.nav',
+ 'footer',
+ '.footer',
+ 'iframe',
+ 'noscript',
+ ]);
+
+ createMetadata(main, document);
+ transformBackgroundImages(main, document);
+ adjustImageUrls(main, url);
+ convertIcons(main, document);
+
+ return main;
+}
diff --git a/src/index.js b/src/index.js
index 5f01278..bce634f 100644
--- a/src/index.js
+++ b/src/index.js
@@ -27,6 +27,18 @@ import WPUtils from './wp/WPUtils.js';
import { html2md, html2docx } from './importer/HTML2x.js';
+import createMetadata from './importer/defaults/rules/createMetadata.js';
+import adjustImageUrls from './importer/defaults/rules/adjustImageUrls.js';
+import convertIcons from './importer/defaults/rules/convertIcons.js';
+import transformBackgroundImages from './importer/defaults/rules/transformBackgroundImages.js';
+
+const rules = {
+ createMetadata,
+ adjustImageUrls,
+ convertIcons,
+ transformBackgroundImages,
+};
+
export {
PageImporter,
PageImporterParams,
@@ -42,4 +54,5 @@ export {
WPUtils,
html2md,
html2docx,
+ rules,
};
diff --git a/src/utils/DOMUtils.js b/src/utils/DOMUtils.js
index 6db0d4f..934ebc5 100644
--- a/src/utils/DOMUtils.js
+++ b/src/utils/DOMUtils.js
@@ -258,7 +258,9 @@ export default class DOMUtils {
const styleAttr = element?.getAttribute('style')?.split(';');
if (styleAttr) {
styleAttr.forEach((style) => {
- const [prop, value] = style.split(':');
+ const split = style.split(':');
+ const prop = split.shift();
+ const value = split.join(':').trim();
if (prop === 'background-image') {
const trimmedValue = value.replace(/\s/g, '');
const elStyle = element.style;
@@ -267,7 +269,7 @@ export default class DOMUtils {
});
const url = element.style.backgroundImage;
if (url && url.toLowerCase() !== 'none') {
- const src = url.replace(/url\(/gm, '').replace(/'/gm, '').replace(/\)/gm, '');
+ const src = url.replace(/url\(/gm, '').replace(/'/gm, '').replace(/"/gm, '').replace(/\)/gm, '');
const img = document.createElement('img');
img.src = src;
return img;
diff --git a/test/TestUtils.js b/test/TestUtils.js
new file mode 100644
index 0000000..7bfe3a8
--- /dev/null
+++ b/test/TestUtils.js
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+
+import { JSDOM } from 'jsdom';
+
+export default class TestUtils {
+ // test environment createDocumentFromString version using JSDOM
+ static createDocumentFromString(html) {
+ const { document } = new JSDOM(html, { runScripts: undefined }).window;
+ return document;
+ }
+}
diff --git a/test/browser/DOMUtils.test.js b/test/browser/DOMUtils.test.js
new file mode 100644
index 0000000..ec8f866
--- /dev/null
+++ b/test/browser/DOMUtils.test.js
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2023 Adobe. All rights reserved.
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License. You may obtain a copy
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
+ * OF ANY KIND, either express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ */
+/* eslint-env mocha */
+/* eslint-disable no-unused-expressions */
+
+import { expect } from '@esm-bundle/chai';
+import BrowserUtils from '../../src/utils/BrowserUtils.js';
+import DOMUtils from '../../src/utils/DOMUtils.js';
+
+const createElement = (document, tag, attrs, styles, innerHTML) => {
+ const element = document.createElement(tag);
+ // eslint-disable-next-line no-restricted-syntax, guard-for-in
+ for (const a in attrs) {
+ element.setAttribute(a, attrs[a]);
+ }
+ // eslint-disable-next-line no-restricted-syntax, guard-for-in
+ for (const p in styles) {
+ element.style[p] = styles[p];
+ }
+ element.innerHTML = innerHTML;
+ return element;
+};
+
+describe('DOMUtils#element', () => {
+ const test = (tag, attrs, styles, innerHTML, expected) => {
+ const document = BrowserUtils.createDocumentFromString('');
+ const element = createElement(document, tag, attrs, styles, innerHTML);
+ const ret = DOMUtils.getImgFromBackground(element, document);
+ if (expected) {
+ expect(ret).to.not.be.null;
+ expect(ret.outerHTML).to.equal(expected);
+ } else {
+ expect(ret).to.be.null;
+ }
+ };
+
+ it('no background-image style', () => {
+ test('p', {}, {}, 'Some content', null);
+ test('img', { src: 'https://www.server.com/image.jpg', title: 'Some title' }, {}, '', null);
+ test('p', {}, { 'background-image': 'none' }, 'Some content', null);
+ });
+
+ it('with background-image style', () => {
+ test('p', {}, { 'background-image': 'url(https://www.server.com/image.jpg)' }, 'Some content', ' ');
+ test('p', {}, { 'background-image': 'url("https://www.server.com/image.jpg")' }, 'Some content', ' ');
+ test('p', {}, { 'background-image': 'url(\'https://www.server.com/image.jpg\')' }, 'Some content', ' ');
+ test('p', {}, { 'background-image': 'url(http://localhost:3001/image.jpg)' }, 'Some content', ' ');
+ });
+
+ // `createElement` uses JSDOM to create the test-DOM
+ // the workaround in DOMUtils#getImgFromBackground exists _precisely_
+ // because of a potential bug in JSDOM due to which it doesn't
+ // parse `url()` with whitespaces correctly
+ // browser specific version of the test
+ it('with background-image style containing whitespace in url()', () => {
+ test('p', {}, { 'background-image': 'url( /image.jpg )' }, 'Some content', ' ');
+ });
+});
diff --git a/test/importers/HTML2x.spec.js b/test/importers/HTML2x.spec.js
index cb09338..4f5b7d5 100644
--- a/test/importers/HTML2x.spec.js
+++ b/test/importers/HTML2x.spec.js
@@ -14,7 +14,6 @@ import {
deepStrictEqual, ok, strictEqual, fail,
} from 'assert';
import { describe, it } from 'mocha';
-import { JSDOM } from 'jsdom';
import { docx2md } from '@adobe/helix-docx2md';
import MockMediaHandler from '../mocks/MockMediaHandler.js';
@@ -22,39 +21,11 @@ import DOMUtils from '../../src/utils/DOMUtils.js';
import {
html2md,
html2docx,
- defaultGenerateDocumentPath,
- defaultTransformDOM,
} from '../../src/importer/HTML2x.js';
-// test environment createDocumentFromString version using JSDOM
-const createDocumentFromString = (html) => {
- const { document } = new JSDOM(html, { runScripts: undefined }).window;
- return document;
-};
-
-describe('defaultTransformDOM tests', () => {
- it('default transformation', async () => {
- const document = createDocumentFromString('Hello World ');
- const out = await defaultTransformDOM({ document });
- strictEqual(out.outerHTML, 'Hello World ');
- });
-});
+import TestUtils from '../TestUtils.js';
-describe('defaultGenerateDocumentPath tests', () => {
- it('default paths', async () => {
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com' }), '/index');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/' }), '/index');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/index.html' }), '/index');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/index' }), '/index');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/page' }), '/page');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/page.html' }), '/page');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/page' }), '/folder/page');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/page.html' }), '/folder/page');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/page/' }), '/folder/page/index');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/page with spaces.html' }), '/folder/page-with-spaces');
- strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/PagE_with_3xtr4_charactére.html' }), '/folder/page-with-3xtr4-charact-re');
- });
-});
+const { createDocumentFromString } = TestUtils;
describe('html2x parameters', () => {
const URL = 'https://www.sample.com/page.html';
@@ -408,8 +379,8 @@ describe('html2md tests', () => {
const out = await html2md('https://www.sample.com/page.html', ' ', null, {
createDocumentFromString,
});
- strictEqual(out.html.trim(), ' ');
- strictEqual(out.md.trim(), '![][image0]\n\n[image0]: ./image.jpg');
+ strictEqual(out.html.trim(), ' ');
+ strictEqual(out.md.trim(), '![][image0]\n\n[image0]: https://www.sample.com/image.jpg');
});
it('html2md allows to preprocess the document', async () => {
@@ -422,8 +393,8 @@ describe('html2md tests', () => {
}, {
createDocumentFromString,
});
- strictEqual(out.html.trim(), ' ');
- strictEqual(out.md.trim(), '![][image0]\n\n[image0]: ./image.jpg');
+ strictEqual(out.html.trim(), ' ');
+ strictEqual(out.md.trim(), '![][image0]\n\n[image0]: https://www.sample.com/image.jpg');
});
it('html2md removes original hrs but keeps md section breaks', async () => {
diff --git a/test/importers/PageImporter.spec.js b/test/importers/PageImporter.spec.js
index fd35140..a984373 100644
--- a/test/importers/PageImporter.spec.js
+++ b/test/importers/PageImporter.spec.js
@@ -237,4 +237,8 @@ describe('PageImporter tests - fixtures', () => {
it('import - sub and sup', async () => {
await featureTest('subsup');
});
+
+ it('import - video', async () => {
+ await featureTest('video');
+ });
});
diff --git a/test/importers/defaults/fixtures/adjust-image-urls.expected.html b/test/importers/defaults/fixtures/adjust-image-urls.expected.html
new file mode 100644
index 0000000..da597a7
--- /dev/null
+++ b/test/importers/defaults/fixtures/adjust-image-urls.expected.html
@@ -0,0 +1,7 @@
+
+ Hello World
+
+
+
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/adjust-image-urls.input.html b/test/importers/defaults/fixtures/adjust-image-urls.input.html
new file mode 100644
index 0000000..6133d9c
--- /dev/null
+++ b/test/importers/defaults/fixtures/adjust-image-urls.input.html
@@ -0,0 +1,10 @@
+
+
+ Hello World
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/background-image.expected.html b/test/importers/defaults/fixtures/background-image.expected.html
new file mode 100644
index 0000000..532a5ae
--- /dev/null
+++ b/test/importers/defaults/fixtures/background-image.expected.html
@@ -0,0 +1,13 @@
+
+ Hello World
+
+
+
some content here
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/background-image.input.html b/test/importers/defaults/fixtures/background-image.input.html
new file mode 100644
index 0000000..fbe28dd
--- /dev/null
+++ b/test/importers/defaults/fixtures/background-image.input.html
@@ -0,0 +1,10 @@
+
+
+ Hello World
+
+ some content here
+
+
+
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/cleanup.expected.html b/test/importers/defaults/fixtures/cleanup.expected.html
new file mode 100644
index 0000000..e87208f
--- /dev/null
+++ b/test/importers/defaults/fixtures/cleanup.expected.html
@@ -0,0 +1,5 @@
+
+
+ Hello World
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/cleanup.input.html b/test/importers/defaults/fixtures/cleanup.input.html
new file mode 100644
index 0000000..d9beebe
--- /dev/null
+++ b/test/importers/defaults/fixtures/cleanup.input.html
@@ -0,0 +1,11 @@
+
+
+
+ Nav might be here
+
+ Some no script here
+ Hello World
+
+
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/default.expected.html b/test/importers/defaults/fixtures/default.expected.html
new file mode 100644
index 0000000..51440d2
--- /dev/null
+++ b/test/importers/defaults/fixtures/default.expected.html
@@ -0,0 +1,4 @@
+
+ Hello World
+ Some text with a span , a link anotherpage and a sub .
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/default.input.html b/test/importers/defaults/fixtures/default.input.html
new file mode 100644
index 0000000..36b7056
--- /dev/null
+++ b/test/importers/defaults/fixtures/default.input.html
@@ -0,0 +1,6 @@
+
+
+ Hello World
+ Some text with a span , a link anotherpage and a sub .
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/icons.expected.html b/test/importers/defaults/fixtures/icons.expected.html
new file mode 100644
index 0000000..90984b6
--- /dev/null
+++ b/test/importers/defaults/fixtures/icons.expected.html
@@ -0,0 +1,4 @@
+
+ Hello World :icon1:
+ This is text with an icon :icon2:
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/icons.input.html b/test/importers/defaults/fixtures/icons.input.html
new file mode 100644
index 0000000..f911f9c
--- /dev/null
+++ b/test/importers/defaults/fixtures/icons.input.html
@@ -0,0 +1,6 @@
+
+
+ Hello World
+ This is text with an icon
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/metadata.all.diff.expected.html b/test/importers/defaults/fixtures/metadata.all.diff.expected.html
new file mode 100644
index 0000000..2e25af5
--- /dev/null
+++ b/test/importers/defaults/fixtures/metadata.all.diff.expected.html
@@ -0,0 +1,40 @@
+
+ Hello World
+
+
+ Metadata
+
+
+ Title
+ Page title - tite element
+
+
+ Description
+ Page description - description meta
+
+
+ Image
+
+
+
+ og:title
+ Page title - og:title meta
+
+
+ og:description
+ Page description - og:description meta
+
+
+ twitter:title
+ Page title - twitter:title meta
+
+
+ twitter:description
+ Page description - twitter:description meta
+
+
+ twitter:image
+
+
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/metadata.all.diff.input.html b/test/importers/defaults/fixtures/metadata.all.diff.input.html
new file mode 100644
index 0000000..4fb8f88
--- /dev/null
+++ b/test/importers/defaults/fixtures/metadata.all.diff.input.html
@@ -0,0 +1,17 @@
+
+
+ Page title - tite element
+
+
+
+
+
+
+
+
+
+
+
+ Hello World
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/metadata.all.same.expected.html b/test/importers/defaults/fixtures/metadata.all.same.expected.html
new file mode 100644
index 0000000..e3451a8
--- /dev/null
+++ b/test/importers/defaults/fixtures/metadata.all.same.expected.html
@@ -0,0 +1,20 @@
+
+ Hello World
+
+
+ Metadata
+
+
+ Title
+ Page title
+
+
+ Description
+ Page description
+
+
+ Image
+
+
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/metadata.all.same.input.html b/test/importers/defaults/fixtures/metadata.all.same.input.html
new file mode 100644
index 0000000..8821abc
--- /dev/null
+++ b/test/importers/defaults/fixtures/metadata.all.same.input.html
@@ -0,0 +1,17 @@
+
+
+ Page title
+
+
+
+
+
+
+
+
+
+
+
+ Hello World
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/metadata.basic.expected.html b/test/importers/defaults/fixtures/metadata.basic.expected.html
new file mode 100644
index 0000000..2ef0c76
--- /dev/null
+++ b/test/importers/defaults/fixtures/metadata.basic.expected.html
@@ -0,0 +1,16 @@
+
+ Hello World
+
+
+ Metadata
+
+
+ Title
+ Page title
+
+
+ Description
+ Page description
+
+
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/metadata.basic.input.html b/test/importers/defaults/fixtures/metadata.basic.input.html
new file mode 100644
index 0000000..dd0179a
--- /dev/null
+++ b/test/importers/defaults/fixtures/metadata.basic.input.html
@@ -0,0 +1,9 @@
+
+
+ Page title
+
+
+
+ Hello World
+
+
+ Hello World
+
+
+ Metadata
+
+
+ Image
+
+
+
+
+ Hello World
+
\ No newline at end of file
diff --git a/test/importers/defaults/fixtures/metadata.image.input.html b/test/importers/defaults/fixtures/metadata.image.input.html
new file mode 100644
index 0000000..f618bed
--- /dev/null
+++ b/test/importers/defaults/fixtures/metadata.image.input.html
@@ -0,0 +1,9 @@
+
+