Skip to content

Commit 61c7bc9

Browse files
Compile Url match regex once and not on every use
Needed for termux#2146.
1 parent 883d6dc commit 61c7bc9

File tree

4 files changed

+107
-91
lines changed

4 files changed

+107
-91
lines changed

app/src/main/java/com/termux/app/terminal/TermuxTerminalViewClient.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import com.termux.R;
2323
import com.termux.app.TermuxActivity;
24+
import com.termux.shared.data.UrlUtils;
2425
import com.termux.shared.shell.ShellUtils;
2526
import com.termux.shared.terminal.TermuxTerminalViewClientBase;
2627
import com.termux.shared.termux.TermuxConstants;
@@ -603,7 +604,7 @@ public void showUrlSelection() {
603604

604605
String text = ShellUtils.getTerminalSessionTranscriptText(session, true, true);
605606

606-
LinkedHashSet<CharSequence> urlSet = DataUtils.extractUrls(text);
607+
LinkedHashSet<CharSequence> urlSet = UrlUtils.extractUrls(text);
607608
if (urlSet.isEmpty()) {
608609
new AlertDialog.Builder(mActivity).setMessage(R.string.title_select_url_none_found).show();
609610
return;

app/src/test/java/com/termux/app/TermuxActivityTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package com.termux.app;
22

3-
import com.termux.shared.data.DataUtils;
3+
import com.termux.shared.data.UrlUtils;
44

55
import org.junit.Assert;
66
import org.junit.Test;
@@ -13,7 +13,7 @@ public class TermuxActivityTest {
1313
private void assertUrlsAre(String text, String... urls) {
1414
LinkedHashSet<String> expected = new LinkedHashSet<>();
1515
Collections.addAll(expected, urls);
16-
Assert.assertEquals(expected, DataUtils.extractUrls(text));
16+
Assert.assertEquals(expected, UrlUtils.extractUrls(text));
1717
}
1818

1919
@Test

termux-shared/src/main/java/com/termux/shared/data/DataUtils.java

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -165,92 +165,4 @@ public static boolean isNullOrEmpty(String string) {
165165
return string == null || string.isEmpty();
166166
}
167167

168-
169-
public static LinkedHashSet<CharSequence> extractUrls(String text) {
170-
171-
StringBuilder regex_sb = new StringBuilder();
172-
173-
regex_sb.append("("); // Begin first matching group.
174-
regex_sb.append("(?:"); // Begin scheme group.
175-
regex_sb.append("dav|"); // The DAV proto.
176-
regex_sb.append("dict|"); // The DICT proto.
177-
regex_sb.append("dns|"); // The DNS proto.
178-
regex_sb.append("file|"); // File path.
179-
regex_sb.append("finger|"); // The Finger proto.
180-
regex_sb.append("ftp(?:s?)|"); // The FTP proto.
181-
regex_sb.append("git|"); // The Git proto.
182-
regex_sb.append("gopher|"); // The Gopher proto.
183-
regex_sb.append("http(?:s?)|"); // The HTTP proto.
184-
regex_sb.append("imap(?:s?)|"); // The IMAP proto.
185-
regex_sb.append("irc(?:[6s]?)|"); // The IRC proto.
186-
regex_sb.append("ip[fn]s|"); // The IPFS proto.
187-
regex_sb.append("ldap(?:s?)|"); // The LDAP proto.
188-
regex_sb.append("pop3(?:s?)|"); // The POP3 proto.
189-
regex_sb.append("redis(?:s?)|"); // The Redis proto.
190-
regex_sb.append("rsync|"); // The Rsync proto.
191-
regex_sb.append("rtsp(?:[su]?)|"); // The RTSP proto.
192-
regex_sb.append("sftp|"); // The SFTP proto.
193-
regex_sb.append("smb(?:s?)|"); // The SAMBA proto.
194-
regex_sb.append("smtp(?:s?)|"); // The SMTP proto.
195-
regex_sb.append("svn(?:(?:\\+ssh)?)|"); // The Subversion proto.
196-
regex_sb.append("tcp|"); // The TCP proto.
197-
regex_sb.append("telnet|"); // The Telnet proto.
198-
regex_sb.append("tftp|"); // The TFTP proto.
199-
regex_sb.append("udp|"); // The UDP proto.
200-
regex_sb.append("vnc|"); // The VNC proto.
201-
regex_sb.append("ws(?:s?)"); // The Websocket proto.
202-
regex_sb.append(")://"); // End scheme group.
203-
regex_sb.append(")"); // End first matching group.
204-
205-
206-
// Begin second matching group.
207-
regex_sb.append("(");
208-
209-
// User name and/or password in format 'user:pass@'.
210-
regex_sb.append("(?:\\S+(?::\\S*)?@)?");
211-
212-
// Begin host group.
213-
regex_sb.append("(?:");
214-
215-
// IP address (from http://www.regular-expressions.info/examples.html).
216-
regex_sb.append("(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|");
217-
218-
// Host name or domain.
219-
regex_sb.append("(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))?|");
220-
221-
// Just path. Used in case of 'file://' scheme.
222-
regex_sb.append("/(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)");
223-
224-
// End host group.
225-
regex_sb.append(")");
226-
227-
// Port number.
228-
regex_sb.append("(?::\\d{1,5})?");
229-
230-
// Resource path with optional query string.
231-
regex_sb.append("(?:/[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?");
232-
233-
// Fragment.
234-
regex_sb.append("(?:#[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?");
235-
236-
// End second matching group.
237-
regex_sb.append(")");
238-
239-
final Pattern urlPattern = Pattern.compile(
240-
regex_sb.toString(),
241-
Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
242-
243-
LinkedHashSet<CharSequence> urlSet = new LinkedHashSet<>();
244-
Matcher matcher = urlPattern.matcher(text);
245-
246-
while (matcher.find()) {
247-
int matchStart = matcher.start(1);
248-
int matchEnd = matcher.end();
249-
String url = text.substring(matchStart, matchEnd);
250-
urlSet.add(url);
251-
}
252-
253-
return urlSet;
254-
}
255-
256168
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
package com.termux.shared.data;
2+
3+
import java.util.LinkedHashSet;
4+
import java.util.regex.Matcher;
5+
import java.util.regex.Pattern;
6+
7+
public class UrlUtils {
8+
9+
public static Pattern URL_MATCH_REGEX;
10+
11+
public static Pattern getUrlMatchRegex() {
12+
if (URL_MATCH_REGEX != null) return URL_MATCH_REGEX;
13+
14+
StringBuilder regex_sb = new StringBuilder();
15+
16+
regex_sb.append("("); // Begin first matching group.
17+
regex_sb.append("(?:"); // Begin scheme group.
18+
regex_sb.append("dav|"); // The DAV proto.
19+
regex_sb.append("dict|"); // The DICT proto.
20+
regex_sb.append("dns|"); // The DNS proto.
21+
regex_sb.append("file|"); // File path.
22+
regex_sb.append("finger|"); // The Finger proto.
23+
regex_sb.append("ftp(?:s?)|"); // The FTP proto.
24+
regex_sb.append("git|"); // The Git proto.
25+
regex_sb.append("gopher|"); // The Gopher proto.
26+
regex_sb.append("http(?:s?)|"); // The HTTP proto.
27+
regex_sb.append("imap(?:s?)|"); // The IMAP proto.
28+
regex_sb.append("irc(?:[6s]?)|"); // The IRC proto.
29+
regex_sb.append("ip[fn]s|"); // The IPFS proto.
30+
regex_sb.append("ldap(?:s?)|"); // The LDAP proto.
31+
regex_sb.append("pop3(?:s?)|"); // The POP3 proto.
32+
regex_sb.append("redis(?:s?)|"); // The Redis proto.
33+
regex_sb.append("rsync|"); // The Rsync proto.
34+
regex_sb.append("rtsp(?:[su]?)|"); // The RTSP proto.
35+
regex_sb.append("sftp|"); // The SFTP proto.
36+
regex_sb.append("smb(?:s?)|"); // The SAMBA proto.
37+
regex_sb.append("smtp(?:s?)|"); // The SMTP proto.
38+
regex_sb.append("svn(?:(?:\\+ssh)?)|"); // The Subversion proto.
39+
regex_sb.append("tcp|"); // The TCP proto.
40+
regex_sb.append("telnet|"); // The Telnet proto.
41+
regex_sb.append("tftp|"); // The TFTP proto.
42+
regex_sb.append("udp|"); // The UDP proto.
43+
regex_sb.append("vnc|"); // The VNC proto.
44+
regex_sb.append("ws(?:s?)"); // The Websocket proto.
45+
regex_sb.append(")://"); // End scheme group.
46+
regex_sb.append(")"); // End first matching group.
47+
48+
49+
// Begin second matching group.
50+
regex_sb.append("(");
51+
52+
// User name and/or password in format 'user:pass@'.
53+
regex_sb.append("(?:\\S+(?::\\S*)?@)?");
54+
55+
// Begin host group.
56+
regex_sb.append("(?:");
57+
58+
// IP address (from http://www.regular-expressions.info/examples.html).
59+
regex_sb.append("(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|");
60+
61+
// Host name or domain.
62+
regex_sb.append("(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)(?:(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))?|");
63+
64+
// Just path. Used in case of 'file://' scheme.
65+
regex_sb.append("/(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)");
66+
67+
// End host group.
68+
regex_sb.append(")");
69+
70+
// Port number.
71+
regex_sb.append("(?::\\d{1,5})?");
72+
73+
// Resource path with optional query string.
74+
regex_sb.append("(?:/[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?");
75+
76+
// Fragment.
77+
regex_sb.append("(?:#[a-zA-Z0-9:@%\\-._~!$&()*+,;=?/]*)?");
78+
79+
// End second matching group.
80+
regex_sb.append(")");
81+
82+
URL_MATCH_REGEX = Pattern.compile(
83+
regex_sb.toString(),
84+
Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
85+
86+
return URL_MATCH_REGEX;
87+
}
88+
89+
public static LinkedHashSet<CharSequence> extractUrls(String text) {
90+
LinkedHashSet<CharSequence> urlSet = new LinkedHashSet<>();
91+
Matcher matcher = getUrlMatchRegex().matcher(text);
92+
93+
while (matcher.find()) {
94+
int matchStart = matcher.start(1);
95+
int matchEnd = matcher.end();
96+
String url = text.substring(matchStart, matchEnd);
97+
urlSet.add(url);
98+
}
99+
100+
return urlSet;
101+
}
102+
103+
}

0 commit comments

Comments
 (0)