Skip to content

Commit

Permalink
feat: add limits
Browse files Browse the repository at this point in the history
fixes #420
  • Loading branch information
tripodsan authored Feb 28, 2024
1 parent 3d8e546 commit 5ff8cec
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 55 deletions.
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"@adobe/fetch": "4.1.1",
"@adobe/helix-admin-support": "2.3.22",
"@adobe/helix-markdown-support": "7.1.0",
"@adobe/helix-mediahandler": "2.4.11",
"@adobe/helix-mediahandler": "2.4.12",
"@adobe/helix-shared-body-data": "2.0.2",
"@adobe/helix-shared-process-queue": "3.0.1",
"@adobe/helix-shared-utils": "3.0.1",
Expand Down
109 changes: 72 additions & 37 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,18 @@
import wrap from '@adobe/helix-shared-wrap';
import { helixStatus } from '@adobe/helix-status';
import bodyData from '@adobe/helix-shared-body-data';
import { Response, h1NoCache } from '@adobe/fetch';
import {
Response,
h1NoCache,
timeoutSignal,
AbortError,
} from '@adobe/fetch';
import { cleanupHeaderValue } from '@adobe/helix-shared-utils';
import { MediaHandler } from '@adobe/helix-mediahandler';
import { fetchFstab, getContentBusId } from '@adobe/helix-admin-support';
import pkgJson from './package.cjs';
import { html2md } from './html2md.js';
import { TooManyImagesError } from './mdast-process-images.js';

/* c8 ignore next 7 */
export const { fetch } = h1NoCache();
Expand Down Expand Up @@ -122,26 +128,44 @@ async function run(request, ctx) {
reqHeaders['x-content-source-location'] = sourceLocation;
}

const res = await fetch(url, {
headers: reqHeaders,
});
if (!res.ok) {
const { status } = res;
if (status >= 400 && status < 500) {
switch (status) {
case 401:
case 403:
case 404:
return error(`resource not found: ${url}`, status);
default:
return error(`error fetching resource at ${url}`, status);
let html;
let res;
// limit response time of content provider to 10s
const signal = timeoutSignal(ctx.env?.HTML_FETCH_TIMEOUT || 10_000);
try {
res = await fetch(url, {
headers: reqHeaders,
signal,
});
html = await res.text();
if (!res.ok) {
const { status } = res;
if (status >= 400 && status < 500) {
switch (status) {
case 401:
case 403:
case 404:
return error(`resource not found: ${url}`, status);
default:
return error(`error fetching resource at ${url}`, status);
}
} else {
// propagate other errors as 502
return error(`error fetching resource at ${url}: ${status}`, 502);
}
} else {
// propagate other errors as 502
return error(`error fetching resource at ${url}: ${status}`, 502);
}
// limit response size of content provider to 1mb
if (html.length > 1024 * 1024) {
return error(`error fetching resource at ${url}: html source larger than 1mb`, 409);
}
} catch (e) {
if (e instanceof AbortError) {
return error(`error fetching resource at ${url}: timeout after 10s`, 504);
}
return error(`error fetching resource at ${url}: ${e.message}`, 502);
} finally {
signal.clear();
}
const html = await res.text();

// only use media handler when loaded via fstab. otherwise images are not processed.
let mediaHandler;
Expand Down Expand Up @@ -170,32 +194,43 @@ async function run(request, ctx) {
filter: /* c8 ignore next */ (blob) => ((blob.contentType || '').startsWith('image/')),
blobAgent: `html2md-${pkgJson.version}`,
noCache,
fetchTimeout: 5000, // limit image fetches to 5s
forceHttp1: true,
});
}

const md = await html2md(html, {
mediaHandler,
log,
url,
});
try {
const md = await html2md(html, {
mediaHandler,
log,
url,
});

const headers = {
'content-type': 'text/markdown; charset=utf-8',
'content-length': md.length,
'cache-control': 'no-store, private, must-revalidate',
'x-source-location': cleanupHeaderValue(url),
};
const headers = {
'content-type': 'text/markdown; charset=utf-8',
'content-length': md.length,
'cache-control': 'no-store, private, must-revalidate',
'x-source-location': cleanupHeaderValue(url),
};

const lastMod = res.headers.get('last-modified');
if (lastMod) {
headers['last-modified'] = lastMod;
}
const lastMod = res.headers.get('last-modified');
if (lastMod) {
headers['last-modified'] = lastMod;
}

return new Response(md, {
status: 200,
headers,
});
return new Response(md, {
status: 200,
headers,
});
} catch (e) {
if (e instanceof TooManyImagesError) {
return error(`error fetching resource at ${url}: ${e.message}`, 409);
}
/* c8 ignore next 2 */
return error(`error fetching resource at ${url}: ${e.message}`, 500);
} finally {
await mediaHandler?.fetchContext.reset();
}
}

export const main = wrap(run)
Expand Down
38 changes: 28 additions & 10 deletions src/mdast-process-images.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
import { visit } from 'unist-util-visit';
import { visit, CONTINUE } from 'unist-util-visit';
import processQueue from '@adobe/helix-shared-process-queue';

export class TooManyImagesError extends Error {
}

/**
* Process images
* @param {Console} log
Expand All @@ -24,33 +27,48 @@ export async function processImages(log, tree, mediaHandler, baseUrl) {
return;
}
// gather all image nodes
const images = [];
const images = new Map();
const register = (node) => {
if (images.has(node.url)) {
images.get(node.url).push(node);
} else {
images.set(node.url, [node]);
}
};

visit(tree, (node) => {
if (node.type === 'image') {
const { url = '' } = node;
if (url.indexOf(':') < 0) {
// eslint-disable-next-line no-param-reassign
node.url = new URL(url, baseUrl).href;
images.push(node);
register(node);
} else if (url.startsWith('https://')) {
images.push(node);
register(node);
}
}
return visit.CONTINUE;
return CONTINUE;
});

if (images.size > 100) {
throw new TooManyImagesError(`maximum number of images reached: ${images.size} of 100 max.`);
}

// upload images
await processQueue(images, async (node) => {
const { url } = node;
await processQueue(images.entries(), async ([url, nodes]) => {
try {
const blob = await mediaHandler.getBlob(node.url, baseUrl);
const blob = await mediaHandler.getBlob(url, baseUrl);
// eslint-disable-next-line no-param-reassign
node.url = blob?.uri || 'about:error';
url = blob?.uri || 'about:error';
/* c8 ignore next 6 */
} catch (e) {
// in case of invalid urls, or other errors
log.warn(`Failed to fetch image for url '${url}': ${e.message}`);
// eslint-disable-next-line no-param-reassign
node.url = 'about:error';
url = 'about:error';
}
for (const node of nodes) {
node.url = url;
}
}, 8);
}
3 changes: 3 additions & 0 deletions test/fixtures/images.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ <h1>Hello, World.</h1>
<picture>
<img src="/absolute.png">
</picture>
<picture>
<img src="/absolute.png">
</picture>
<picture>
<img src="relative.png">
</picture>
Expand Down
2 changes: 2 additions & 0 deletions test/fixtures/images.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

![][image0]

![][image0]

![][image1]

![][image2]
Expand Down
77 changes: 74 additions & 3 deletions test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ describe('Index Tests', () => {
assert.strictEqual((await result.text()).trim(), expected.trim());
assert.deepStrictEqual(result.headers.plain(), {
'cache-control': 'no-store, private, must-revalidate',
'content-length': '811',
'content-length': '824',
'content-type': 'text/markdown; charset=utf-8',
'last-modified': 'Sat, 22 Feb 2031 15:28:00 GMT',
'x-source-location': 'https://www.example.com/blog/article',
Expand Down Expand Up @@ -188,7 +188,7 @@ describe('Index Tests', () => {
assert.strictEqual((await result.text()).trim(), expected.trim());
assert.deepStrictEqual(result.headers.plain(), {
'cache-control': 'no-store, private, must-revalidate',
'content-length': '811',
'content-length': '824',
'content-type': 'text/markdown; charset=utf-8',
'last-modified': 'Sat, 22 Feb 2031 15:28:00 GMT',
'x-source-location': 'https://www.example.com/blog/article',
Expand Down Expand Up @@ -409,7 +409,45 @@ mountpoints:
});
});

it('returns 502 for am error response', async () => {
it('returns 409 for too many different images', async () => {
let html = '<html><body><main><div>';
for (let i = 0; i < 101; i += 1) {
html += `<img src="/image-${i}.png">`;
}
html += '</div></main></body>';

nock.fstab();
nock('https://www.example.com')
.get('/')
.reply(200, html);

const result = await main(reqUrl('/'), { log: console, env: {} });
assert.strictEqual(result.status, 409);
assert.strictEqual(await result.text(), '');
assert.deepStrictEqual(result.headers.plain(), {
'cache-control': 'no-store, private, must-revalidate',
'content-type': 'text/plain; charset=utf-8',
'x-error': 'error fetching resource at https://www.example.com/: maximum number of images reached: 101 of 100 max.',
});
});

it('returns 409 for a large html', async () => {
nock.fstab();
nock('https://www.example.com')
.get('/')
.reply(200, 'x'.repeat(1024 ** 2 + 1));

const result = await main(reqUrl('/'), { log: console });
assert.strictEqual(result.status, 409);
assert.strictEqual(await result.text(), '');
assert.deepStrictEqual(result.headers.plain(), {
'cache-control': 'no-store, private, must-revalidate',
'content-type': 'text/plain; charset=utf-8',
'x-error': 'error fetching resource at https://www.example.com/: html source larger than 1mb',
});
});

it('returns 502 for a error response', async () => {
nock.fstab();
nock('https://www.example.com')
.get('/')
Expand All @@ -424,4 +462,37 @@ mountpoints:
'x-error': 'error fetching resource at https://www.example.com/: 500',
});
});

it('returns 502 for a fetch error', async () => {
nock.fstab();
nock('https://www.example.com')
.get('/')
.replyWithError(new Error('boom!'));

const result = await main(reqUrl('/'), { log: console });
assert.strictEqual(result.status, 502);
assert.strictEqual(await result.text(), '');
assert.deepStrictEqual(result.headers.plain(), {
'cache-control': 'no-store, private, must-revalidate',
'content-type': 'text/plain; charset=utf-8',
'x-error': 'error fetching resource at https://www.example.com/: boom!',
});
});

it('returns 504 when html fetch times out', async () => {
nock.fstab();
nock('https://www.example.com')
.get('/')
.delay(100)
.reply(404);

const result = await main(reqUrl('/'), { log: console, env: { HTML_FETCH_TIMEOUT: 10 } });
assert.strictEqual(result.status, 504);
assert.strictEqual(await result.text(), '');
assert.deepStrictEqual(result.headers.plain(), {
'cache-control': 'no-store, private, must-revalidate',
'content-type': 'text/plain; charset=utf-8',
'x-error': 'error fetching resource at https://www.example.com/: timeout after 10s',
});
});
});

0 comments on commit 5ff8cec

Please sign in to comment.