su.sanitizeHTML = function(aString) {
var str = aString.toString();
// Strip out null.
str = str.replace(/\[0x00\]/gmi,'');
// Strip out carriage returns and other control characters.
str = str.replace(/(
|
|	|	)/gmi,'');
str = str.replace(/( ||\t|\n|\r)/gmi,'');
// Strip out tags that do not close.
str = str.replace(/<[^>]*$/gmi,'');
// Strip out tags that do not open.
str = str.replace(/^[^<]*>/gmi,'');
// Check all instances of HTML tags. Only if they match our very limited
// white list will they be allowed through.
return str.replace(/<[^>]*>/gmi, function(match) {
// If there are *any* style or javascript strings inside the tag,
// then strip it. Also, look for any open parenthesis (escaped or
// unescaped), curly braces, or square backets, since simple link URLs
// will not contain these whereas javascript will.
var containsStyleRegex = new RegExp('style\s*|\\(|)|<.*<' +
'|script:|file:|ftp:|(|\{|\}|\[|\]|%5B|%5D|%3C|%3E|(','img');
if (containsStyleRegex.test(match) == true) {
return '';
}
// If it's an http: or https: link or a font tag, then let it through.
var isLinkOrFontRegex = new RegExp('^<\/*(a href=("|") http|font)','img');
if (isLinkOrFontRegex.test(match) == true) {
// If there is any attribute that starts with "on", then strip the
// tag, since this could be a binding to a JS event.
var containsJSBinding = new RegExp('on\\S*\\s*=','img');
if (containsJSBinding.test(match) == true) {
return '';
} else {
return match;
}
}
// Finally, only allow it if it's in our explicit white list.
var whiteListRegEx = new RegExp('^</*' +
'(b|i|u|strong|em|p|br|ol|ul|li|a)/*>$', 'img');
if (whiteListRegEx.test(match) == true) {
return match;
} else {
return '';
}
});
};