mirror of
https://github.com/oxen-io/session-android.git
synced 2025-01-12 21:23:38 +00:00
Update URL homograph rules.
This commit is contained in:
parent
ab2e85f6c7
commit
5b534c8b1a
@ -1,99 +0,0 @@
|
||||
package org.thoughtcrime.securesms.linkpreview;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class LegalUrlPatterns {
|
||||
|
||||
public static final Pattern LATIN = Pattern.compile("[" +
|
||||
"\\x{0041}-\\x{005A}" +
|
||||
"\\x{0061}-\\x{007A}" +
|
||||
"\\x{00AA}" +
|
||||
"\\x{00BA}" +
|
||||
"\\x{00C0}-\\x{00DC}" +
|
||||
"\\x{00D8}-\\x{00F6}" +
|
||||
"\\x{00F8}-\\x{01BA}" +
|
||||
"]");
|
||||
|
||||
public static final Pattern CYRILLIC = Pattern.compile("[" +
|
||||
"\\x{0400}-\\x{0481}" +
|
||||
"\\x{0482}" +
|
||||
"\\x{0483}-\\x{0484}" +
|
||||
"\\x{0487}" +
|
||||
"\\x{0488}-\\x{0489}" +
|
||||
"\\x{048A}-\\x{052F}" +
|
||||
"\\x{1C80}-\\x{1C88}" +
|
||||
"\\x{1D2B}" +
|
||||
"\\x{1D78}" +
|
||||
"\\x{2DE0}-\\x{2DFF}" +
|
||||
"\\x{A640}-\\x{A66D}" +
|
||||
"\\x{A66E}" +
|
||||
"\\x{A66F}" +
|
||||
"\\x{A670}-\\x{A672}" +
|
||||
"\\x{A673}" +
|
||||
"\\x{A674}-\\x{A67D}" +
|
||||
"\\x{A67E}" +
|
||||
"\\x{A67F}" +
|
||||
"\\x{A680}-\\x{A69B}" +
|
||||
"\\x{A69C}-\\x{A69D}" +
|
||||
"\\x{A69E}-\\x{A69F}" +
|
||||
"\\x{FE2E}-\\x{FE2F}" +
|
||||
"]");
|
||||
|
||||
public static final Pattern GREEK = Pattern.compile("[" +
|
||||
"\\x{0370}-\\x{0373}" +
|
||||
"\\x{0375}" +
|
||||
"\\x{0376}-\\x{0377}" +
|
||||
"\\x{037A}" +
|
||||
"\\x{037B}-\\x{037D}" +
|
||||
"\\x{037F}" +
|
||||
"\\x{0384}" +
|
||||
"\\x{0386}" +
|
||||
"\\x{0388}-\\x{038A}" +
|
||||
"\\x{038C}" +
|
||||
"\\x{038E}-\\x{03A1}" +
|
||||
"\\x{03A3}-\\x{03E1}" +
|
||||
"\\x{03F0}-\\x{03F5}" +
|
||||
"\\x{03F6}" +
|
||||
"\\x{03F7}-\\x{03FF}" +
|
||||
"\\x{1D26}-\\x{1D2A}" +
|
||||
"\\x{1D5D}-\\x{1D61}" +
|
||||
"\\x{1D66}-\\x{1D6A}" +
|
||||
"\\x{1DBF}" +
|
||||
"\\x{1F00}-\\x{1F15}" +
|
||||
"\\x{1F18}-\\x{1F1D}" +
|
||||
"\\x{1F20}-\\x{1F45}" +
|
||||
"\\x{1F48}-\\x{1F4D}" +
|
||||
"\\x{1F50}-\\x{1F57}" +
|
||||
"\\x{1F59}" +
|
||||
"\\x{1F5B}" +
|
||||
"\\x{1F5D}" +
|
||||
"\\x{1F5F}-\\x{1F7D}" +
|
||||
"\\x{1F80}-\\x{1FB4}" +
|
||||
"\\x{1FB6}-\\x{1FBC}" +
|
||||
"\\x{1FBD}" +
|
||||
"\\x{1FBE}" +
|
||||
"\\x{1FBF}-\\x{1FC1}" +
|
||||
"\\x{1FC2}-\\x{1FC4}" +
|
||||
"\\x{1FC6}-\\x{1FCC}" +
|
||||
"\\x{1FCD}-\\x{1FCF}" +
|
||||
"\\x{1FD0}-\\x{1FD3}" +
|
||||
"\\x{1FD6}-\\x{1FDB}" +
|
||||
"\\x{1FDD}-\\x{1FDF}" +
|
||||
"\\x{1FE0}-\\x{1FEC}" +
|
||||
"\\x{1FED}-\\x{1FEF}" +
|
||||
"\\x{1FF2}-\\x{1FF4}" +
|
||||
"\\x{1FF6}-\\x{1FFC}" +
|
||||
"\\x{1FFD}-\\x{1FFE}" +
|
||||
"\\x{2126}" +
|
||||
"\\x{AB65}" +
|
||||
"\\x{10140}-\\x{10174}"+
|
||||
"\\x{10175}-\\x{10178}"+
|
||||
"\\x{10179}-\\x{10189}"+
|
||||
"\\x{1018A}-\\x{1018B}"+
|
||||
"\\x{1018C}-\\x{1018E}"+
|
||||
"\\x{101A0}"+
|
||||
"\\x{1D200}-\\x{1D241}"+
|
||||
"\\x{1D242}-\\x{1D244}"+
|
||||
"\\x{1D245}"+
|
||||
"]");
|
||||
}
|
@ -10,11 +10,17 @@ import com.annimon.stream.Stream;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import okhttp3.HttpUrl;
|
||||
|
||||
public final class LinkPreviewUtil {
|
||||
|
||||
private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$");
|
||||
private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$");
|
||||
private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$");
|
||||
|
||||
/**
|
||||
* @return All whitelisted URLs in the source text.
|
||||
*/
|
||||
@ -57,10 +63,16 @@ public final class LinkPreviewUtil {
|
||||
}
|
||||
|
||||
public static boolean isLegalUrl(@NonNull String url) {
|
||||
if (LegalUrlPatterns.LATIN.matcher(url).find()) {
|
||||
return !LegalUrlPatterns.CYRILLIC.matcher(url).find() &&
|
||||
!LegalUrlPatterns.GREEK.matcher(url).find();
|
||||
Matcher matcher = DOMAIN_PATTERN.matcher(url);
|
||||
|
||||
if (matcher.matches()) {
|
||||
String domain = matcher.group(2);
|
||||
String cleanedDomain = domain.replaceAll("\\.", "");
|
||||
|
||||
return ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() ||
|
||||
ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -8,17 +8,62 @@ import static junit.framework.TestCase.assertTrue;
|
||||
public class LinkPreviewUtilTest {
|
||||
|
||||
@Test
|
||||
public void isLegal_allLatin() {
|
||||
assertTrue(LinkPreviewUtil.isLegalUrl("https://signal.org"));
|
||||
public void isLegal_allAscii_noProtocol() {
|
||||
assertTrue(LinkPreviewUtil.isLegalUrl("google.com"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_latinAndCyrillic() {
|
||||
assertFalse(LinkPreviewUtil.isLegalUrl("https://www.аmazon.com"));
|
||||
public void isLegal_allAscii_noProtocol_subdomain() {
|
||||
assertTrue(LinkPreviewUtil.isLegalUrl("foo.google.com"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_latinAndGreek() {
|
||||
assertFalse(LinkPreviewUtil.isLegalUrl("https://www.αpple.com"));
|
||||
public void isLegal_allAscii_subdomain() {
|
||||
assertTrue(LinkPreviewUtil.isLegalUrl("https://foo.google.com"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_allAscii_subdomain_path() {
|
||||
assertTrue(LinkPreviewUtil.isLegalUrl("https://foo.google.com/some/path.html"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_cyrillicHostAsciiTld() {
|
||||
assertFalse(LinkPreviewUtil.isLegalUrl("http://кц.com"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_cyrillicHostAsciiTld_noProtocol() {
|
||||
assertFalse(LinkPreviewUtil.isLegalUrl("кц.com"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_mixedHost_noProtocol() {
|
||||
assertFalse(LinkPreviewUtil.isLegalUrl("http://asĸ.com"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_cyrillicHostAndTld_noProtocol() {
|
||||
assertTrue(LinkPreviewUtil.isLegalUrl("кц.рф"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_cyrillicHostAndTld_asciiPath_noProtocol() {
|
||||
assertTrue(LinkPreviewUtil.isLegalUrl("кц.рф/some/path"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_cyrillicHostAndTld_asciiPath() {
|
||||
assertTrue(LinkPreviewUtil.isLegalUrl("https://кц.рф/some/path"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_asciiSubdomain_cyrillicHostAndTld() {
|
||||
assertFalse(LinkPreviewUtil.isLegalUrl("http://foo.кц.рф"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void isLegal_emptyUrl() {
|
||||
assertFalse(LinkPreviewUtil.isLegalUrl(""));
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user