diff --git a/src/org/thoughtcrime/securesms/linkpreview/LegalUrlPatterns.java b/src/org/thoughtcrime/securesms/linkpreview/LegalUrlPatterns.java deleted file mode 100644 index 1fe3ef7b22..0000000000 --- a/src/org/thoughtcrime/securesms/linkpreview/LegalUrlPatterns.java +++ /dev/null @@ -1,99 +0,0 @@ -package org.thoughtcrime.securesms.linkpreview; - -import java.util.regex.Pattern; - -public class LegalUrlPatterns { - - public static final Pattern LATIN = Pattern.compile("[" + - "\\x{0041}-\\x{005A}" + - "\\x{0061}-\\x{007A}" + - "\\x{00AA}" + - "\\x{00BA}" + - "\\x{00C0}-\\x{00DC}" + - "\\x{00D8}-\\x{00F6}" + - "\\x{00F8}-\\x{01BA}" + - "]"); - - public static final Pattern CYRILLIC = Pattern.compile("[" + - "\\x{0400}-\\x{0481}" + - "\\x{0482}" + - "\\x{0483}-\\x{0484}" + - "\\x{0487}" + - "\\x{0488}-\\x{0489}" + - "\\x{048A}-\\x{052F}" + - "\\x{1C80}-\\x{1C88}" + - "\\x{1D2B}" + - "\\x{1D78}" + - "\\x{2DE0}-\\x{2DFF}" + - "\\x{A640}-\\x{A66D}" + - "\\x{A66E}" + - "\\x{A66F}" + - "\\x{A670}-\\x{A672}" + - "\\x{A673}" + - "\\x{A674}-\\x{A67D}" + - "\\x{A67E}" + - "\\x{A67F}" + - "\\x{A680}-\\x{A69B}" + - "\\x{A69C}-\\x{A69D}" + - "\\x{A69E}-\\x{A69F}" + - "\\x{FE2E}-\\x{FE2F}" + - "]"); - - public static final Pattern GREEK = Pattern.compile("[" + - "\\x{0370}-\\x{0373}" + - "\\x{0375}" + - "\\x{0376}-\\x{0377}" + - "\\x{037A}" + - "\\x{037B}-\\x{037D}" + - "\\x{037F}" + - "\\x{0384}" + - "\\x{0386}" + - "\\x{0388}-\\x{038A}" + - "\\x{038C}" + - "\\x{038E}-\\x{03A1}" + - "\\x{03A3}-\\x{03E1}" + - "\\x{03F0}-\\x{03F5}" + - "\\x{03F6}" + - "\\x{03F7}-\\x{03FF}" + - "\\x{1D26}-\\x{1D2A}" + - "\\x{1D5D}-\\x{1D61}" + - "\\x{1D66}-\\x{1D6A}" + - "\\x{1DBF}" + - "\\x{1F00}-\\x{1F15}" + - "\\x{1F18}-\\x{1F1D}" + - "\\x{1F20}-\\x{1F45}" + - "\\x{1F48}-\\x{1F4D}" + - "\\x{1F50}-\\x{1F57}" + - "\\x{1F59}" + - "\\x{1F5B}" + - "\\x{1F5D}" + - "\\x{1F5F}-\\x{1F7D}" + - "\\x{1F80}-\\x{1FB4}" + - "\\x{1FB6}-\\x{1FBC}" + - "\\x{1FBD}" + - "\\x{1FBE}" + - "\\x{1FBF}-\\x{1FC1}" + - "\\x{1FC2}-\\x{1FC4}" + - "\\x{1FC6}-\\x{1FCC}" + - "\\x{1FCD}-\\x{1FCF}" + - "\\x{1FD0}-\\x{1FD3}" + - "\\x{1FD6}-\\x{1FDB}" + - "\\x{1FDD}-\\x{1FDF}" + - "\\x{1FE0}-\\x{1FEC}" + - "\\x{1FED}-\\x{1FEF}" + - "\\x{1FF2}-\\x{1FF4}" + - "\\x{1FF6}-\\x{1FFC}" + - "\\x{1FFD}-\\x{1FFE}" + - "\\x{2126}" + - "\\x{AB65}" + - "\\x{10140}-\\x{10174}"+ - "\\x{10175}-\\x{10178}"+ - "\\x{10179}-\\x{10189}"+ - "\\x{1018A}-\\x{1018B}"+ - "\\x{1018C}-\\x{1018E}"+ - "\\x{101A0}"+ - "\\x{1D200}-\\x{1D241}"+ - "\\x{1D242}-\\x{1D244}"+ - "\\x{1D245}"+ - "]"); -} diff --git a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java index 4124d2fff6..cefa024664 100644 --- a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java +++ b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java @@ -10,11 +10,17 @@ import com.annimon.stream.Stream; import java.util.Collections; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import okhttp3.HttpUrl; public final class LinkPreviewUtil { + private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$"); + private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$"); + private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$"); + /** * @return All whitelisted URLs in the source text. */ @@ -57,10 +63,16 @@ public final class LinkPreviewUtil { } public static boolean isLegalUrl(@NonNull String url) { - if (LegalUrlPatterns.LATIN.matcher(url).find()) { - return !LegalUrlPatterns.CYRILLIC.matcher(url).find() && - !LegalUrlPatterns.GREEK.matcher(url).find(); + Matcher matcher = DOMAIN_PATTERN.matcher(url); + + if (matcher.matches()) { + String domain = matcher.group(2); + String cleanedDomain = domain.replaceAll("\\.", ""); + + return ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() || + ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches(); + } else { + return false; } - return true; } } diff --git a/test/unitTest/java/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtilTest.java b/test/unitTest/java/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtilTest.java index 8d1c972955..b5c2fde306 100644 --- a/test/unitTest/java/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtilTest.java +++ b/test/unitTest/java/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtilTest.java @@ -8,17 +8,62 @@ import static junit.framework.TestCase.assertTrue; public class LinkPreviewUtilTest { @Test - public void isLegal_allLatin() { - assertTrue(LinkPreviewUtil.isLegalUrl("https://signal.org")); + public void isLegal_allAscii_noProtocol() { + assertTrue(LinkPreviewUtil.isLegalUrl("google.com")); } @Test - public void isLegal_latinAndCyrillic() { - assertFalse(LinkPreviewUtil.isLegalUrl("https://www.аmazon.com")); + public void isLegal_allAscii_noProtocol_subdomain() { + assertTrue(LinkPreviewUtil.isLegalUrl("foo.google.com")); } @Test - public void isLegal_latinAndGreek() { - assertFalse(LinkPreviewUtil.isLegalUrl("https://www.αpple.com")); + public void isLegal_allAscii_subdomain() { + assertTrue(LinkPreviewUtil.isLegalUrl("https://foo.google.com")); + } + + @Test + public void isLegal_allAscii_subdomain_path() { + assertTrue(LinkPreviewUtil.isLegalUrl("https://foo.google.com/some/path.html")); + } + + @Test + public void isLegal_cyrillicHostAsciiTld() { + assertFalse(LinkPreviewUtil.isLegalUrl("http://кц.com")); + } + + @Test + public void isLegal_cyrillicHostAsciiTld_noProtocol() { + assertFalse(LinkPreviewUtil.isLegalUrl("кц.com")); + } + + @Test + public void isLegal_mixedHost_noProtocol() { + assertFalse(LinkPreviewUtil.isLegalUrl("http://asĸ.com")); + } + + @Test + public void isLegal_cyrillicHostAndTld_noProtocol() { + assertTrue(LinkPreviewUtil.isLegalUrl("кц.рф")); + } + + @Test + public void isLegal_cyrillicHostAndTld_asciiPath_noProtocol() { + assertTrue(LinkPreviewUtil.isLegalUrl("кц.рф/some/path")); + } + + @Test + public void isLegal_cyrillicHostAndTld_asciiPath() { + assertTrue(LinkPreviewUtil.isLegalUrl("https://кц.рф/some/path")); + } + + @Test + public void isLegal_asciiSubdomain_cyrillicHostAndTld() { + assertFalse(LinkPreviewUtil.isLegalUrl("http://foo.кц.рф")); + } + + @Test + public void isLegal_emptyUrl() { + assertFalse(LinkPreviewUtil.isLegalUrl("")); } }