Skip to content

Commit 0586c25

Browse files
authored
feat(text selector): normalize whitespace for quoted match (#5049)
This changes quoted text selector like `text="Foo Bar"` to perform normalized whitespace match. Most of the time users want to match some string visible on the page, and that always means normalized whitespace. We keep the case sensitivity and full-string vs substring difference between quoted and unquoted matches.
1 parent 9e3bd78 commit 0586c25

File tree

3 files changed

+25
-20
lines changed

3 files changed

+25
-20
lines changed

docs/src/selectors.md

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,10 @@ await page.click("text=Log in")
2525
page.click("text=Log in")
2626
```
2727

28-
By default, the match is case-insensitive, it ignores leading/trailing whitespace and searches for
29-
a substring. This means `text= Login` matches `<button>Button loGIN (click me)</button>`.
28+
Matching is case-insensitive and searches for a substring. This means `text=Login` matches `<button>Button loGIN (click me)</button>`. Matching also normalizes whitespace, for example it turns multiple spaces into one, turns line breaks into spaces and ignores leading and trailing whitespace.
3029

31-
Text body can be escaped with single or double quotes for precise matching, insisting on exact match,
32-
including specified whitespace and case. This means `text="Login "` will only match
33-
`<button>Login </button>` with exactly one space after "Login". Quoted text follows the usual escaping
34-
rules, e.g. use `\"` to escape double quote in a double-quoted string: `text="foo\"bar"`.
30+
Text body can be escaped with single or double quotes for full-string case-sensitive match instead. This means `text="Login"` will match `<button>Login</button>`, but not `<button>Login (click me)</button>` or `<button>login</button>`. Quoted text follows the usual escaping
31+
rules, e.g. use `\"` to escape double quote in a double-quoted string: `text="foo\"bar"`. Note that quoted match still normalizes whitespace.
3532

3633
Text body can also be a JavaScript-like regex wrapped in `/` symbols. This means `text=/^\\s*Login$/i`
3734
will match `<button> loGIN</button>` with any number of spaces before "Login" and no spaces after.
@@ -156,7 +153,7 @@ The `:text` pseudo-class matches elements that have a text node child with speci
156153
It is similar to the [text] engine, but can be used in combination with other `css` selector extensions.
157154
There are a few variations that support different arguments:
158155

159-
* `:text("substring")` - Matches when element's text contains "substring" somewhere. Matching is case-insensitive. Matching also normalizes whitespace, for example it turns multiple spaces into one, trusn line breaks into spaces and ignores leading and trailing whitespace.
156+
* `:text("substring")` - Matches when element's text contains "substring" somewhere. Matching is case-insensitive. Matching also normalizes whitespace, for example it turns multiple spaces into one, turns line breaks into spaces and ignores leading and trailing whitespace.
160157
* `:text-is("string")` - Matches when element's text equals the "string". Matching is case-insensitive and normalizes whitespace.
161158
* `button:text("Sign in")` - Text selector may be combined with regular CSS.
162159
* `:text-matches("[+-]?\\d+")` - Matches text against a regular expression. Note that special characters like back-slash `\`, quotes `"`, square brackets `[]` and more should be escaped. Learn more about [regular expressions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp).

src/server/injected/textSelectorEngine.ts

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,29 @@ function unescape(s: string): string {
4646

4747
type Matcher = (text: string) => boolean;
4848
function createMatcher(selector: string): Matcher {
49-
if (selector.length > 1 && selector[0] === '"' && selector[selector.length - 1] === '"') {
50-
const parsed = unescape(selector.substring(1, selector.length - 1));
51-
return text => text === parsed;
52-
}
53-
if (selector.length > 1 && selector[0] === "'" && selector[selector.length - 1] === "'") {
54-
const parsed = unescape(selector.substring(1, selector.length - 1));
55-
return text => text === parsed;
56-
}
5749
if (selector[0] === '/' && selector.lastIndexOf('/') > 0) {
5850
const lastSlash = selector.lastIndexOf('/');
5951
const re = new RegExp(selector.substring(1, lastSlash), selector.substring(lastSlash + 1));
6052
return text => re.test(text);
6153
}
62-
selector = selector.trim().toLowerCase().replace(/\s+/g, ' ');
63-
return text => text.toLowerCase().replace(/\s+/g, ' ').includes(selector);
54+
let strict = false;
55+
if (selector.length > 1 && selector[0] === '"' && selector[selector.length - 1] === '"') {
56+
selector = unescape(selector.substring(1, selector.length - 1));
57+
strict = true;
58+
}
59+
if (selector.length > 1 && selector[0] === "'" && selector[selector.length - 1] === "'") {
60+
selector = unescape(selector.substring(1, selector.length - 1));
61+
strict = true;
62+
}
63+
selector = selector.trim().replace(/\s+/g, ' ');
64+
if (!strict)
65+
selector = selector.toLowerCase();
66+
return text => {
67+
text = text.trim().replace(/\s+/g, ' ');
68+
if (!strict)
69+
return text.toLowerCase().includes(selector);
70+
return text === selector;
71+
};
6472
}
6573

6674
// Skips <head>, <script> and <style> elements and all their children.

test/selectors-text.spec.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ it('should work', async ({page}) => {
2626
expect(await page.$eval(`text=ye`, e => e.outerHTML)).toBe('<div>\nye </div>');
2727

2828
await page.setContent(`<div> ye </div><div>ye</div>`);
29-
expect(await page.$eval(`text="ye"`, e => e.outerHTML)).toBe('<div>ye</div>');
29+
expect(await page.$eval(`text="ye"`, e => e.outerHTML)).toBe('<div> ye </div>');
3030

3131
await page.setContent(`<div>yo</div><div>"ya</div><div> hello world! </div>`);
3232
expect(await page.$eval(`text="\\"ya"`, e => e.outerHTML)).toBe('<div>"ya</div>');
@@ -98,9 +98,9 @@ it('should work', async ({page}) => {
9898
await page.setContent(`<span>Sign&nbsp;in</span><span>Hello\n \nworld</span>`);
9999
expect(await page.$eval(`text=Sign in`, e => e.outerHTML)).toBe('<span>Sign&nbsp;in</span>');
100100
expect((await page.$$(`text=Sign \tin`)).length).toBe(1);
101-
expect(await page.$(`text="Sign in"`)).toBe(null);
102-
expect((await page.$$(`text="Sign in"`)).length).toBe(0);
101+
expect((await page.$$(`text="Sign in"`)).length).toBe(1);
103102
expect(await page.$eval(`text=lo wo`, e => e.outerHTML)).toBe('<span>Hello\n \nworld</span>');
103+
expect(await page.$eval(`text="Hello world"`, e => e.outerHTML)).toBe('<span>Hello\n \nworld</span>');
104104
expect(await page.$(`text="lo wo"`)).toBe(null);
105105
expect((await page.$$(`text=lo \nwo`)).length).toBe(1);
106106
expect((await page.$$(`text="lo wo"`)).length).toBe(0);

0 commit comments

Comments
 (0)