Skip to content

Commit

Permalink
Merge branch 'main' into console-logger
Browse files Browse the repository at this point in the history
- regenerated package-lock
  • Loading branch information
trieloff committed Nov 29, 2023
2 parents 62bdf8b + e057540 commit f7e211a
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 2 deletions.
4 changes: 3 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,10 @@ jobs:
workflows:
default:
jobs:
- build
- build:
context: Project Helix
- branch-deploy:
context: Project Helix
requires:
- build
filters:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ pkg
*.map
vendor/package
junit
src/spiders.mjs

3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
"scripts": {
"fastly-build": "hedy --build --verbose --directory $INIT_CWD/bin --entryFile $INIT_CWD/src/index.mjs --target c@e --arch node --arch edge --externals fastly:logger --package.name helix3",
"deploy": "hedy --build --verbose --directory $INIT_CWD/bin --entryFile $INIT_CWD/src/index.mjs --deploy --fastly-auth $HLX_FASTLY_AUTH --compute-service-id ${HLX_FASTLY_SVC:-5Qir8H8bLeaRuldIp9TWq4} --target c@e --arch node --arch edge --fastly-gateway deploy-test.anywhere.run --externals fastly:logger --package.name helix3",
"predeploy": "node tools/spider-list.js",
"test": "c8 mocha -i -g 'Post-Deploy'",
"pretest": "node tools/spider-list.js",
"test-postdeploy": "mocha -g 'Post-Deploy'",
"prelint": "node tools/spider-list.js",
"lint": "./node_modules/.bin/eslint .",
"semantic-release": "semantic-release"
},
Expand Down
5 changes: 4 additions & 1 deletion src/utils.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
* governing permissions and limitations under the License.
*/
// Pass the current time to facilitate unit testing
import { isSpider } from './spiders.mjs';

export function maskTime(time, timePadding) {
const msPerHour = 3600000;

Expand Down Expand Up @@ -62,7 +64,8 @@ export function getMaskedUserAgent(userAgent) {
if (lcUA.includes('bot')
|| lcUA.includes('spider')
|| lcUA.includes('crawler')
|| lcUA.includes('ahc/')) {
|| lcUA.includes('ahc/')
|| isSpider(lcUA)) {
return 'bot';
}

Expand Down
28 changes: 28 additions & 0 deletions test/spider.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright 2023 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
/* eslint-env mocha */
import assert from 'assert';
import { isSpider } from '../src/spiders.mjs';

describe('Test IAB Spider List', () => {
it('isSpider is a function', () => {
assert.equal(typeof isSpider, 'function');
});

it('Firefox is not a spider', () => {
assert.equal(isSpider('Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0'), false);
});

it('Googlebot is a spider', () => {
assert.equal(isSpider('Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'), true);
});
});
96 changes: 96 additions & 0 deletions tools/spider-list.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/env node;
/*
* Copyright 2023 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
/*
# Fields - delimited by a pipe symbol [|]:
# 1) pattern - case insensitive string to match anywhere in the string
# 2) active flag
# 1=pattern is active and should be matched
# 0=pattern is inactive, and should ignored
# 3) comma and space separated list of exception patterns - case insensitive string to
# match anywhere in the string. Use ì, î
# 4) an additional flag was added to this list in November 2005 to identify
# those user-agent strings on this list that would not pass the valid user-
# agent test and therefore, are redundant if both lists are used.
# 1=this entry is not needed for those who use a two-pass approach
# 0=this entry is always needed for both one-pass and two-pass
# approaches
# 5) Another flag was added to this list when the IAB and ABC merged their two
# lists (01/06) to identify those strings that primarily impact page
# impression measurement vs. those strings that primarily impact ad
# impression measurement (or both). The flags are as follows:
# 0=this entry primarily impacts page impression measurement
# 1=this entry primarily impacts ad impression measurement
# 2=this entry impacts both
# 6) start-of-string flag
# 1=pattern must occur at the start of the UA string
# 0=pattern may appear anywhere within the UA string
# 7) **Inactive Date. Inactive Robot List Only.
# mm/dd/yyyy format
*/
const { writeFileSync } = require('fs');

const spiders = process.env.IAB_SPIDER_LIST || '';

const spiderList = spiders
.split(/\r?\n/)
.filter((line) => line && !line.startsWith('#'))
.map((line) => {
// eslint-disable-next-line no-unused-vars
const [pattern, active, exceptions, _redundant, _impression, start] = line.split('|');
return {
pattern: pattern.toLowerCase(),
active: active === '1',
exceptions: exceptions ? exceptions.split(', ') : [],
start: start === '1',
};
})
.filter((entry) => entry.active)
.map((entry) => {
const start = `
if ((() => {
const pattern = '${entry.pattern.toLowerCase()}';
const match = ${entry.start ? 'ua.toLowerCase().startsWith(pattern)' : 'ua.toLowerCase().includes(pattern)'};
if (match) {`;
const middle = entry.exceptions
.map((exception) => `
if (ua.includes('${exception.toLowerCase()}')) {
return false;
}`);
const end = `
return true;
}
return false;
})(ua)) {
return true;
}`;
return start + middle.join('\n') + end;
});
const start = `/*
* Copyright 2023 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
export function isSpider(ua) {`;
const middle = spiderList.join('\n');
const end = `
return false;
}
`;
// console.log(start + middle + end);
writeFileSync('./src/spiders.mjs', start + middle + end);

0 comments on commit f7e211a

Please sign in to comment.