From 9f0a61a3f026019d8705c5666fdeab630480fab3 Mon Sep 17 00:00:00 2001 From: Yarmo Mackenbach Date: Sat, 10 Sep 2022 12:14:05 +0200 Subject: [PATCH] Add util to get URLs from string --- src/utils.js | 35 +++++++++++++++++++++++++++++++++++ test/utils.test.js | 21 +++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/src/utils.js b/src/utils.js index 56fb7c0..9b945f0 100644 --- a/src/utils.js +++ b/src/utils.js @@ -63,5 +63,40 @@ const generateClaim = (fingerprint, format) => { } } +/** + * Get the URIs from a string and return them as an array + * @param {string} text - The text that may contain URIs + * @returns {Array.string} + */ +const getUriFromString = (text) => { + const re = /((([A-Za-z0-9]+:(?:\/\/)?)(?:[-;:&=+$,\w]+@)?[A-Za-z0-9.-]+|(?:www\.|[-;:&=+$,\w]+@)[A-Za-z0-9.-]+)((?:\/[+~%/.\w\-_]*)?\??(?:[-+=&;%@.\w_]*)#?(?:[.!/\\\w]*))?)/gi + const res = text.match(re) + + const urls = [] + + if (!res) { + return [] + } + + res.forEach(url => { + // Remove bad trailing characters + let hasBadTrailingChars = true + + while (hasBadTrailingChars) { + const lastChar = url.charAt(url.length - 1) + if ('?!.'.indexOf(lastChar) === -1) { + hasBadTrailingChars = false + continue + } + url = url.substring(0, url.length - 1) + } + + urls.push(url) + }) + + return urls +} + exports.generateProxyURL = generateProxyURL exports.generateClaim = generateClaim +exports.getUriFromString = getUriFromString diff --git a/test/utils.test.js b/test/utils.test.js index 9c52103..616f10c 100644 --- a/test/utils.test.js +++ b/test/utils.test.js @@ -18,6 +18,14 @@ const expect = chai.expect const doipjs = require('../src') +const textWithUrls = `This is text with URLs like https://domain.tld. Ow, a trailing dot. +What about (https://between.parentheses)? What about [https://between.brackets]? +What about https://in.question? What about https://in.exclamation! +And openpgp4fpr:123123, nonsense:123123` +const urlsFromText = ["https://domain.tld", "https://between.parentheses", + "https://between.brackets", "https://in.question", "https://in.exclamation", + "openpgp4fpr:123123", "nonsense:123123"] + describe('utils.generateClaim', () => { it('should be a function (2 arguments)', () => { expect(doipjs.utils.generateClaim).to.be.a('function') @@ -57,3 +65,16 @@ describe('utils.generateProxyURL', () => { ).to.equal('https://localhost/api/2/get/dns?domain=domain.org') }) }) + +describe('utils.getUriFromString', () => { + it('should be a function (1 arguments)', () => { + expect(doipjs.utils.getUriFromString).to.be.a('function') + expect(doipjs.utils.getUriFromString).to.have.length(1) + }) + it('should extract URLs from text', () => { + expect( + doipjs.utils.getUriFromString(textWithUrls) + ).to.have.length(urlsFromText.length) + }) + // TODO Properly check each URL +})