Merge pull request #1780 from crazyserver/MOBILE-2822

MOBILE-2822 utils: Make word count aware of block tags
main
Juan Leyva 2019-02-25 18:26:58 +01:00 committed by GitHub
commit 25916fdea3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 1 deletions

View File

@ -214,10 +214,20 @@ export class CoreTextUtilsProvider {
if (!text || typeof text != 'string') { if (!text || typeof text != 'string') {
return 0; return 0;
} }
const blockTags = ['address', 'article', 'aside', 'blockquote', 'br', ' details', 'dialog', 'dd', 'div', 'dl', 'dt',
'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr',
'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul'];
// Clean HTML scripts and tags. // Clean HTML scripts and tags.
text = text.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, ''); text = text.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, '');
text = text.replace(/<\/?(?!\!)[^>]*>/gi, ''); // Replace block tags by space to get word count aware of line break and remove inline tags.
text = text.replace(/<(\/[ ]*)?([a-zA-Z0-9]+)[^>]*>/gi, (str, p1, match) => {
if (blockTags.indexOf(match) >= 0) {
return ' ';
}
return '';
});
// Decode HTML entities. // Decode HTML entities.
text = this.decodeHTMLEntities(text); text = this.decodeHTMLEntities(text);
// Replace underscores (which are classed as word characters) with spaces. // Replace underscores (which are classed as word characters) with spaces.