1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | |
29 | |
30 | |
31 | import HTMLSanitizerData from './HTMLSanitizerData.js'; |
32 | import HtmlStringValidationResult from './HtmlStringValidationResult.js'; |
33 | import UrlValidationResult from './UrlValidationResult.js'; |
34 | import { SVG_NS, ZERO, NOT_FOUND } from '../../main/Constants.js'; |
35 | |
36 | |
37 | |
38 | |
39 | |
40 | |
41 | |
42 | |
43 | |
44 | |
45 | class HTMLSanitizer { |
46 | |
47 | |
48 | |
49 | |
50 | |
51 | |
52 | #stringifiedHTML = ''; |
53 | |
54 | |
55 | |
56 | |
57 | |
58 | |
59 | #stringifyErrors = ''; |
60 | |
61 | |
62 | |
63 | |
64 | |
65 | |
66 | static #htmlSanitizerData = new HTMLSanitizerData ( ); |
67 | |
68 | |
69 | |
70 | |
71 | |
72 | |
73 | |
74 | #addHtmlEntities ( htmlString ) { |
75 | const newHtmlString = htmlString |
76 | .replaceAll ( /\u003c/g, '<' ) |
77 | .replaceAll ( /\u003e/g, '>' ) |
78 | .replaceAll ( /\u0022/g, '"' ) |
79 | .replaceAll ( /\u0027/g, ''' ) |
80 | .replaceAll ( /\u0a00/g, ' ' ); |
81 | |
82 | return newHtmlString; |
83 | } |
84 | |
85 | |
86 | |
87 | |
88 | |
89 | |
90 | |
91 | #stringifyUrl ( url, attributeName ) { |
92 | const validUrl = this.sanitizeToUrl ( url, attributeName ).url; |
93 | if ( '' === validUrl && '' !== url ) { |
94 | this.#stringifyErrors += |
95 | '\nAn invalid url (' + url + ') was removed from a ' + attributeName + ' attribute'; |
96 | } |
97 | else { |
98 | this.#stringifiedHTML += ' ' + attributeName + '="' + validUrl + '"'; |
99 | } |
100 | } |
101 | |
102 | |
103 | |
104 | |
105 | |
106 | |
107 | |
108 | #stringifySvgAttributes ( currentNode, nodeName ) { |
109 | HTMLSanitizer.#htmlSanitizerData.getValidAttributesNames ( nodeName ).forEach ( |
110 | validAttributeName => { |
111 | if ( currentNode.hasAttributeNS ( null, validAttributeName ) ) { |
112 | this.#stringifiedHTML += ' ' + validAttributeName + '="' + |
113 | this.#addHtmlEntities ( currentNode.getAttributeNS ( null, validAttributeName ) ) + |
114 | '"'; |
115 | currentNode.removeAttributeNS ( null, validAttributeName ); |
116 | } |
117 | } |
118 | ); |
119 | } |
120 | |
121 | |
122 | |
123 | |
124 | |
125 | |
126 | |
127 | #stringifyHTMLAttributes ( currentNode, nodeName ) { |
128 | if ( currentNode.hasAttribute ( 'target' ) ) { |
129 | this.#stringifiedHTML += ' rel="noopener noreferrer"'; |
130 | } |
131 | HTMLSanitizer.#htmlSanitizerData.getValidAttributesNames ( nodeName ).forEach ( |
132 | validAttributeName => { |
133 | if ( currentNode.hasAttribute ( validAttributeName ) ) { |
134 | if ( 'href' === validAttributeName || 'src' === validAttributeName ) { |
135 | this.#stringifyUrl ( currentNode.getAttribute ( validAttributeName ), validAttributeName ); |
136 | } |
137 | else { |
138 | this.#stringifiedHTML += ' ' + validAttributeName + '="' + |
139 | this.#addHtmlEntities ( currentNode.getAttribute ( validAttributeName ) ) + |
140 | '"'; |
141 | } |
142 | currentNode.removeAttribute ( validAttributeName ); |
143 | } |
144 | } |
145 | ); |
146 | } |
147 | |
148 | |
149 | |
150 | |
151 | |
152 | |
153 | #addStringifyErrors ( currentNode ) { |
154 | for ( let attCounter = ZERO; attCounter < currentNode.attributes.length; attCounter ++ ) { |
155 | if ( 'rel' !== currentNode.attributes [ attCounter ].name ) { |
156 | this.#stringifyErrors += |
157 | '\nAn unsecure attribute ' + |
158 | currentNode.attributes [ attCounter ].name + |
159 | '="' + |
160 | currentNode.attributes [ attCounter ].value + |
161 | '" was removed.'; |
162 | } |
163 | } |
164 | } |
165 | |
166 | |
167 | |
168 | |
169 | |
170 | |
171 | |
172 | #stringify ( sourceNode ) { |
173 | const childs = sourceNode.childNodes; |
174 | for ( let nodeCounter = 0; nodeCounter < childs.length; nodeCounter ++ ) { |
175 | const currentNode = sourceNode.childNodes [ nodeCounter ]; |
176 | const nodeName = HTMLSanitizer.#htmlSanitizerData.getValidNodeName ( currentNode.nodeName ); |
177 | if ( '' === nodeName ) { |
178 | this.#stringifyErrors += '\nAn invalid tag ' + currentNode.nodeName + ' was removed'; |
179 | } |
180 | else if ( '\u0023text' === nodeName ) { |
181 | this.#stringifiedHTML += this.#addHtmlEntities ( currentNode.nodeValue ); |
182 | } |
183 | else { |
184 | this.#stringifiedHTML += '<' + nodeName; |
185 | if ( 'svg' === nodeName || 'text' === nodeName || 'polyline' === nodeName ) { |
186 | this.#stringifySvgAttributes ( currentNode, nodeName ); |
187 | } |
188 | else { |
189 | this.#stringifyHTMLAttributes ( currentNode, nodeName ); |
190 | } |
191 | this.#stringifiedHTML += '>'; |
192 | this.#stringify ( currentNode ); |
193 | this.#stringifiedHTML += '</' + nodeName + '>'; |
194 | if ( currentNode.attributes ) { |
195 | this.#addStringifyErrors ( currentNode ); |
196 | } |
197 | } |
198 | } |
199 | } |
200 | |
201 | |
202 | |
203 | |
204 | |
205 | |
206 | |
207 | #cloneSvg ( currentNode, nodeName ) { |
208 | const newChildNode = document.createElementNS ( SVG_NS, nodeName ); |
209 | HTMLSanitizer.#htmlSanitizerData.getValidAttributesNames ( nodeName ).forEach ( |
210 | validAttributeName => { |
211 | if ( currentNode.hasAttributeNS ( null, validAttributeName ) ) { |
212 | newChildNode.setAttributeNS ( |
213 | null, |
214 | validAttributeName, |
215 | currentNode.getAttributeNS ( null, validAttributeName ) |
216 | ); |
217 | currentNode.removeAttributeNS ( null, validAttributeName ); |
218 | } |
219 | } |
220 | ); |
221 | return newChildNode; |
222 | } |
223 | |
224 | |
225 | |
226 | |
227 | |
228 | |
229 | |
230 | #cloneHTML ( currentNode, nodeName ) { |
231 | const newChildNode = document.createElement ( nodeName ); |
232 | HTMLSanitizer.#htmlSanitizerData.getValidAttributesNames ( nodeName ).forEach ( |
233 | validAttributeName => { |
234 | if ( currentNode.hasAttribute ( validAttributeName ) ) { |
235 | if ( 'href' === validAttributeName || 'src' === validAttributeName ) { |
236 | const attributeValue = this.sanitizeToUrl ( |
237 | currentNode.getAttribute ( validAttributeName ), |
238 | validAttributeName |
239 | ).url; |
240 | if ( '' !== attributeValue ) { |
241 | newChildNode.setAttribute ( validAttributeName, attributeValue ); |
242 | } |
243 | } |
244 | else { |
245 | newChildNode.setAttribute ( |
246 | validAttributeName, |
247 | currentNode.getAttribute ( validAttributeName ) |
248 | ); |
249 | } |
250 | } |
251 | } |
252 | ); |
253 | if ( currentNode.hasAttribute ( 'target' ) ) { |
254 | newChildNode.setAttribute ( 'rel', 'noopener noreferrer' ); |
255 | } |
256 | return newChildNode; |
257 | } |
258 | |
259 | |
260 | |
261 | |
262 | |
263 | |
264 | |
265 | |
266 | |
267 | #cloneNode ( clonedNode, newNode ) { |
268 | const childs = clonedNode.childNodes; |
269 | for ( let nodeCounter = 0; nodeCounter < childs.length; nodeCounter ++ ) { |
270 | const currentNode = clonedNode.childNodes [ nodeCounter ]; |
271 | const nodeName = HTMLSanitizer.#htmlSanitizerData.getValidNodeName ( currentNode.nodeName ); |
272 | if ( '\u0023text' === nodeName ) { |
273 | newNode.appendChild ( document.createTextNode ( currentNode.nodeValue ) ); |
274 | } |
275 | else if ( '' !== nodeName ) { |
276 | const newChildNode = |
277 | 'svg' === nodeName || 'text' === nodeName || 'polyline' === nodeName |
278 | ? |
279 | this.#cloneSvg ( currentNode, nodeName ) |
280 | : |
281 | this.#cloneHTML ( currentNode, nodeName ); |
282 | |
283 | newNode.appendChild ( newChildNode ); |
284 | this.#cloneNode ( currentNode, newChildNode ); |
285 | } |
286 | } |
287 | } |
288 | |
289 | |
290 | |
291 | |
292 | |
293 | constructor ( ) { |
294 | Object.freeze ( this ); |
295 | } |
296 | |
297 | |
298 | |
299 | |
300 | |
301 | |
302 | |
303 | |
304 | |
305 | sanitizeToHtmlElement ( htmlString, targetNode ) { |
306 | |
307 | const parseResult = new DOMParser ( ).parseFromString ( '<div>' + htmlString + '</div>', 'text/html' ); |
308 | |
309 | const docFragment = new DocumentFragment ( ); |
310 | if ( parseResult && '\u0023document' === parseResult.nodeName ) { |
311 | this.#cloneNode ( parseResult.body.firstChild, docFragment ); |
312 | targetNode.appendChild ( docFragment ); |
313 | } |
314 | else { |
315 | targetNode.textContent = ''; |
316 | } |
317 | } |
318 | |
319 | |
320 | |
321 | |
322 | |
323 | |
324 | |
325 | clone ( htmlElement ) { |
326 | const clone = document.createElement ( htmlElement.tagName ); |
327 | this.#cloneNode ( htmlElement, clone ); |
328 | |
329 | return clone; |
330 | } |
331 | |
332 | |
333 | |
334 | |
335 | |
336 | |
337 | |
338 | |
339 | |
340 | sanitizeToHtmlString ( htmlString ) { |
341 | |
342 | |
343 | |
344 | this.#stringifiedHTML = ''; |
345 | this.#stringifyErrors = ''; |
346 | |
347 | const parseResult = |
348 | new DOMParser ( ).parseFromString ( '<div>' + htmlString.replace ( ' ', '\u0a00' ) + '</div>', 'text/html' ); |
349 | if ( parseResult && '\u0023document' === parseResult.nodeName ) { |
350 | this.#stringify ( parseResult.body.firstChild ); |
351 | return new HtmlStringValidationResult ( this.#stringifiedHTML, this.#stringifyErrors ); |
352 | } |
353 | return new HtmlStringValidationResult ( '', 'Parsing error' ); |
354 | } |
355 | |
356 | |
357 | |
358 | |
359 | |
360 | |
361 | |
362 | |
363 | |
364 | |
365 | |
366 | |
367 | |
368 | |
369 | |
370 | |
371 | |
372 | sanitizeToUrl ( urlString, attributeName ) { |
373 | |
374 | const tmpAttributeName = attributeName || 'href'; |
375 | |
376 | |
377 | const parseResult = new DOMParser ( ).parseFromString ( '<div>' + urlString + '</div>', 'text/html' ); |
378 | if ( ! parseResult || '\u0023document' !== parseResult.nodeName ) { |
379 | |
380 | |
381 | return new UrlValidationResult ( '', 'Parsing error' ); |
382 | } |
383 | |
384 | |
385 | const resultNode = parseResult.body.firstChild; |
386 | let newUrlString = ''; |
387 | for ( let nodeCounter = 0; nodeCounter < resultNode.childNodes.length; nodeCounter ++ ) { |
388 | if ( '\u0023text' === resultNode.childNodes [ nodeCounter ].nodeName ) { |
389 | |
390 | |
391 | newUrlString += resultNode.childNodes [ nodeCounter ].nodeValue; |
392 | } |
393 | else { |
394 | |
395 | |
396 | return new UrlValidationResult ( '', 'Invalid characters found in the url' ); |
397 | } |
398 | } |
399 | |
400 | |
401 | newUrlString = newUrlString |
402 | .replaceAll ( /</g, '' ) |
403 | .replaceAll ( />/g, '' ) |
404 | .replaceAll ( /"/g, '' ) |
405 | .replaceAll ( /\u0027/g, '' ) |
406 | .replaceAll ( /</g, '' ) |
407 | .replaceAll ( />/g, '' ) |
408 | .replaceAll ( /"/g, '' ) |
409 | .replaceAll ( /'/g, '' ) |
410 | .replaceAll ( /%3C/g, '' ) |
411 | .replaceAll ( /%3c/g, '' ) |
412 | .replaceAll ( /%3E/g, '' ) |
413 | .replaceAll ( /%3e/g, '' ) |
414 | .replaceAll ( /%22/g, '' ) |
415 | .replaceAll ( /%27/g, '' ); |
416 | |
417 | |
418 | if ( newUrlString !== urlString ) { |
419 | |
420 | |
421 | return new UrlValidationResult ( '', 'Invalid characters found in the url' ); |
422 | } |
423 | |
424 | |
425 | const validProtocols = [ 'https:' ]; |
426 | if ( 'http:' === window.location.protocol || 'href' === tmpAttributeName ) { |
427 | validProtocols.push ( 'http:' ); |
428 | } |
429 | if ( 'href' === tmpAttributeName ) { |
430 | validProtocols.push ( 'mailto:' ); |
431 | validProtocols.push ( 'sms:' ); |
432 | validProtocols.push ( 'tel:' ); |
433 | |
434 | |
435 | const urlHash = newUrlString.match ( /^\u0023\w*/ ); |
436 | if ( urlHash && newUrlString === urlHash [ ZERO ] ) { |
437 | return new UrlValidationResult ( newUrlString, '' ); |
438 | } |
439 | } |
440 | if ( 'src' === tmpAttributeName ) { |
441 | validProtocols.push ( 'data:' ); |
442 | } |
443 | |
444 | |
445 | let url = null; |
446 | try { |
447 | url = new URL ( newUrlString ); |
448 | } |
449 | catch { |
450 | |
451 | |
452 | return new UrlValidationResult ( '', 'Invalid url string' ); |
453 | } |
454 | if ( NOT_FOUND === validProtocols.indexOf ( url.protocol ) ) { |
455 | |
456 | |
457 | return new UrlValidationResult ( '', 'Invalid protocol ' + url.protocol ); |
458 | } |
459 | if ( NOT_FOUND !== [ 'sms:', 'tel:' ].indexOf ( url.protocol ) ) { |
460 | |
461 | |
462 | if ( url.pathname.match ( /^\+[0-9,*,\u0023]*$/ ) ) { |
463 | return new UrlValidationResult ( newUrlString, '' ); |
464 | } |
465 | |
466 | return new UrlValidationResult ( '', 'Invalid sms: or tel: url' ); |
467 | |
468 | } |
469 | |
470 | |
471 | try { |
472 | encodeURIComponent ( url.href ); |
473 | } |
474 | catch { |
475 | return new UrlValidationResult ( '', 'Invalid character in url' ); |
476 | } |
477 | return new UrlValidationResult ( newUrlString, '' ); |
478 | } |
479 | |
480 | |
481 | |
482 | |
483 | |
484 | |
485 | |
486 | sanitizeToJsString ( stringToSanitize ) { |
487 | |
488 | |
489 | const parseResult = new DOMParser ( ).parseFromString ( '<div>' + stringToSanitize + '</div>', 'text/html' ); |
490 | if ( ! parseResult || '\u0023document' !== parseResult.nodeName ) { |
491 | |
492 | |
493 | return ''; |
494 | } |
495 | const resultNode = parseResult.body.firstChild; |
496 | let sanitizedString = ''; |
497 | for ( let nodeCounter = 0; nodeCounter < resultNode.childNodes.length; nodeCounter ++ ) { |
498 | if ( '\u0023text' === resultNode.childNodes [ nodeCounter ].nodeName ) { |
499 | sanitizedString += resultNode.childNodes [ nodeCounter ].nodeValue; |
500 | } |
501 | else { |
502 | |
503 | |
504 | return ''; |
505 | } |
506 | } |
507 | |
508 | |
509 | sanitizedString = sanitizedString |
510 | .replaceAll ( /</g, '\u227a' ) |
511 | .replaceAll ( />/g, '\u227b' ) |
512 | .replaceAll ( /"/g, '\u2033' ) |
513 | .replaceAll ( /\u0027/g, '\u2032' ); |
514 | |
515 | return sanitizedString; |
516 | } |
517 | |
518 | |
519 | |
520 | |
521 | |
522 | |
523 | |
524 | sanitizeToColor ( colorString ) { |
525 | const newColor = colorString.match ( /^\u0023[0-9,A-F,a-f]{6}$/ ); |
526 | if ( newColor ) { |
527 | return newColor [ ZERO ]; |
528 | } |
529 | return null; |
530 | } |
531 | } |
532 | |
533 | |
534 | |
535 | |
536 | |
537 | |
538 | |
539 | |
540 | const theHTMLSanitizer = new HTMLSanitizer ( ); |
541 | |
542 | export default theHTMLSanitizer; |
543 | |
544 | |
545 | |