From 6603f7d3db4937860fe739910f1ae907753ae361 Mon Sep 17 00:00:00 2001 From: Dani Palou Date: Fri, 31 Mar 2023 15:15:24 +0200 Subject: [PATCH] MOBILE-4276 utils: Handle span correctly when counting words --- src/core/services/tests/utils/text.test.ts | 37 ++++++++++++++++++++++ src/core/services/utils/text.ts | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/core/services/tests/utils/text.test.ts b/src/core/services/tests/utils/text.test.ts index 67db67650..793546903 100644 --- a/src/core/services/tests/utils/text.test.ts +++ b/src/core/services/tests/utils/text.test.ts @@ -116,4 +116,41 @@ describe('CoreTextUtilsProvider', () => { expect(replaced).toEqual('http://campus.edu?device=iPhone%20or%20iPad&version=1.2.3'); }); + it('counts words', () => { + expect(textUtils.countWords('')).toEqual(0); + expect(textUtils.countWords('one two three four')).toEqual(4); + expect(textUtils.countWords('a\'b')).toEqual(1); + expect(textUtils.countWords('1+1=2')).toEqual(1); + expect(textUtils.countWords(' one-sided ')).toEqual(1); + expect(textUtils.countWords('one two')).toEqual(2); + expect(textUtils.countWords('email@example.com')).toEqual(1); + expect(textUtils.countWords('first\\part second/part')).toEqual(2); + expect(textUtils.countWords('

one two

three four

')).toEqual(4); + expect(textUtils.countWords('

one two
three four

')).toEqual(4); + expect(textUtils.countWords('

one two
three four

')).toEqual(4); + expect(textUtils.countWords(' one ... three ')).toEqual(3); + expect(textUtils.countWords('just...one')).toEqual(1); + expect(textUtils.countWords(' one & three ')).toEqual(3); + expect(textUtils.countWords('just&one')).toEqual(1); + expect(textUtils.countWords('em—dash')).toEqual(2); + expect(textUtils.countWords('en–dash')).toEqual(2); + expect(textUtils.countWords('1³ £2 €3.45 $6,789')).toEqual(4); + expect(textUtils.countWords('ブルース カンベッル')).toEqual(2); + expect(textUtils.countWords('

one two

three four

')).toEqual(4); + expect(textUtils.countWords('

one two


three four

')).toEqual(4); + expect(textUtils.countWords('

one

four.

')).toEqual(4); + expect(textUtils.countWords('

emphasis.

')).toEqual(1); + expect(textUtils.countWords('

emphasis.

')).toEqual(1); + expect(textUtils.countWords('

emphasis.

')).toEqual(1); + expect(textUtils.countWords('

emphasis.

')).toEqual(1); + expect(textUtils.countWords('one\ntwo')).toEqual(2); + expect(textUtils.countWords('one\rtwo')).toEqual(2); + expect(textUtils.countWords('one\ttwo')).toEqual(2); + expect(textUtils.countWords('one\vtwo')).toEqual(2); + expect(textUtils.countWords('one\ftwo')).toEqual(2); + expect(textUtils.countWords('SO42-')).toEqual(1); + expect(textUtils.countWords('4+4=8 i.e. O(1) a,b,c,d I’m black&blue_really')).toEqual(6); + expect(textUtils.countWords('ab')).toEqual(1); + }); + }); diff --git a/src/core/services/utils/text.ts b/src/core/services/utils/text.ts index 80b4cef8c..d5e478100 100644 --- a/src/core/services/utils/text.ts +++ b/src/core/services/utils/text.ts @@ -339,7 +339,7 @@ export class CoreTextUtilsProvider { // Before stripping tags, add a space after the close tag of anything that is not obviously inline. // Also, br is a special case because it definitely delimits a word, but has no close tag. - text = text.replace(/(<\/(?!a>|b>|del>|em>|i>|ins>|s>|small>|strong>|sub>|sup>|u>)\w+>|
|)/ig, '$1 '); + text = text.replace(/(<\/(?!a>|b>|del>|em>|i>|ins>|s>|small>|span>|strong>|sub>|sup>|u>)\w+>|
|)/ig, '$1 '); // Now remove HTML tags. text = text.replace(/(<([^>]+)>)/ig, '');