MOBILE-4276 utils: Handle span correctly when counting words

main
Dani Palou 2023-03-31 15:15:24 +02:00
parent 61809c06c8
commit 6603f7d3db
2 changed files with 38 additions and 1 deletions

View File

@ -116,4 +116,41 @@ describe('CoreTextUtilsProvider', () => {
expect(replaced).toEqual('http://campus.edu?device=iPhone%20or%20iPad&version=1.2.3');
});
it('counts words', () => {
expect(textUtils.countWords('')).toEqual(0);
expect(textUtils.countWords('one two three four')).toEqual(4);
expect(textUtils.countWords('a\'b')).toEqual(1);
expect(textUtils.countWords('1+1=2')).toEqual(1);
expect(textUtils.countWords(' one-sided ')).toEqual(1);
expect(textUtils.countWords('one two')).toEqual(2);
expect(textUtils.countWords('email@example.com')).toEqual(1);
expect(textUtils.countWords('first\\part second/part')).toEqual(2);
expect(textUtils.countWords('<p>one two<br></br>three four</p>')).toEqual(4);
expect(textUtils.countWords('<p>one two<br>three four</p>')).toEqual(4);
expect(textUtils.countWords('<p>one two<br />three four</p>')).toEqual(4);
expect(textUtils.countWords(' one ... three ')).toEqual(3);
expect(textUtils.countWords('just...one')).toEqual(1);
expect(textUtils.countWords(' one & three ')).toEqual(3);
expect(textUtils.countWords('just&one')).toEqual(1);
expect(textUtils.countWords('em—dash')).toEqual(2);
expect(textUtils.countWords('endash')).toEqual(2);
expect(textUtils.countWords('1³ £2 €3.45 $6,789')).toEqual(4);
expect(textUtils.countWords('ブルース カンベッル')).toEqual(2);
expect(textUtils.countWords('<p>one two</p><p>three four</p>')).toEqual(4);
expect(textUtils.countWords('<p>one two</p><p><br/></p><p>three four</p>')).toEqual(4);
expect(textUtils.countWords('<p>one</p><ul><li>two</li><li>three</li></ul><p>four.</p>')).toEqual(4);
expect(textUtils.countWords('<p>em<b>phas</b>is.</p>')).toEqual(1);
expect(textUtils.countWords('<p>em<i>phas</i>is.</p>')).toEqual(1);
expect(textUtils.countWords('<p>em<strong>phas</strong>is.</p>')).toEqual(1);
expect(textUtils.countWords('<p>em<em>phas</em>is.</p>')).toEqual(1);
expect(textUtils.countWords('one\ntwo')).toEqual(2);
expect(textUtils.countWords('one\rtwo')).toEqual(2);
expect(textUtils.countWords('one\ttwo')).toEqual(2);
expect(textUtils.countWords('one\vtwo')).toEqual(2);
expect(textUtils.countWords('one\ftwo')).toEqual(2);
expect(textUtils.countWords('SO<sub>4</sub><sup>2-</sup>')).toEqual(1);
expect(textUtils.countWords('4+4=8 i.e. O(1) a,b,c,d Im black&blue_really')).toEqual(6);
expect(textUtils.countWords('<span>a</span><span>b</span>')).toEqual(1);
});
});

View File

@ -339,7 +339,7 @@ export class CoreTextUtilsProvider {
// Before stripping tags, add a space after the close tag of anything that is not obviously inline.
// Also, br is a special case because it definitely delimits a word, but has no close tag.
text = text.replace(/(<\/(?!a>|b>|del>|em>|i>|ins>|s>|small>|strong>|sub>|sup>|u>)\w+>|<br>|<br\s*\/>)/ig, '$1 ');
text = text.replace(/(<\/(?!a>|b>|del>|em>|i>|ins>|s>|small>|span>|strong>|sub>|sup>|u>)\w+>|<br>|<br\s*\/>)/ig, '$1 ');
// Now remove HTML tags.
text = text.replace(/(<([^>]+)>)/ig, '');