Add util to get URLs from string

This commit is contained in:
Yarmo Mackenbach 2022-09-10 12:14:05 +02:00
parent a389a148e8
commit 9f0a61a3f0
No known key found for this signature in database
GPG key ID: 37367F4AF4087AD1
2 changed files with 56 additions and 0 deletions

View file

@ -63,5 +63,40 @@ const generateClaim = (fingerprint, format) => {
}
}
/**
* Get the URIs from a string and return them as an array
* @param {string} text - The text that may contain URIs
* @returns {Array.string}
*/
const getUriFromString = (text) => {
const re = /((([A-Za-z0-9]+:(?:\/\/)?)(?:[-;:&=+$,\w]+@)?[A-Za-z0-9.-]+|(?:www\.|[-;:&=+$,\w]+@)[A-Za-z0-9.-]+)((?:\/[+~%/.\w\-_]*)?\??(?:[-+=&;%@.\w_]*)#?(?:[.!/\\\w]*))?)/gi
const res = text.match(re)
const urls = []
if (!res) {
return []
}
res.forEach(url => {
// Remove bad trailing characters
let hasBadTrailingChars = true
while (hasBadTrailingChars) {
const lastChar = url.charAt(url.length - 1)
if ('?!.'.indexOf(lastChar) === -1) {
hasBadTrailingChars = false
continue
}
url = url.substring(0, url.length - 1)
}
urls.push(url)
})
return urls
}
exports.generateProxyURL = generateProxyURL
exports.generateClaim = generateClaim
exports.getUriFromString = getUriFromString

View file

@ -18,6 +18,14 @@ const expect = chai.expect
const doipjs = require('../src')
const textWithUrls = `This is text with URLs like https://domain.tld. Ow, a trailing dot.
What about (https://between.parentheses)? What about [https://between.brackets]?
What about https://in.question? What about https://in.exclamation!
And openpgp4fpr:123123, nonsense:123123`
const urlsFromText = ["https://domain.tld", "https://between.parentheses",
"https://between.brackets", "https://in.question", "https://in.exclamation",
"openpgp4fpr:123123", "nonsense:123123"]
describe('utils.generateClaim', () => {
it('should be a function (2 arguments)', () => {
expect(doipjs.utils.generateClaim).to.be.a('function')
@ -57,3 +65,16 @@ describe('utils.generateProxyURL', () => {
).to.equal('https://localhost/api/2/get/dns?domain=domain.org')
})
})
describe('utils.getUriFromString', () => {
it('should be a function (1 arguments)', () => {
expect(doipjs.utils.getUriFromString).to.be.a('function')
expect(doipjs.utils.getUriFromString).to.have.length(1)
})
it('should extract URLs from text', () => {
expect(
doipjs.utils.getUriFromString(textWithUrls)
).to.have.length(urlsFromText.length)
})
// TODO Properly check each URL
})