From b2d4608cdbe3792f3011590ebe4a97c684304422 Mon Sep 17 00:00:00 2001 From: Greyson Parrelli Date: Wed, 20 Feb 2019 17:00:23 -0800 Subject: [PATCH] Improved handling for URLs that are composed of mixed character sets. --- .../conversation/ConversationItem.java | 8 ++ .../linkpreview/LegalUrlPatterns.java | 99 +++++++++++++++++++ .../linkpreview/LinkPreviewUtil.java | 26 +++-- .../linkpreview/LinkPreviewUtilTest.java | 24 +++++ 4 files changed, 149 insertions(+), 8 deletions(-) create mode 100644 src/org/thoughtcrime/securesms/linkpreview/LegalUrlPatterns.java create mode 100644 test/unitTest/java/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtilTest.java diff --git a/src/org/thoughtcrime/securesms/conversation/ConversationItem.java b/src/org/thoughtcrime/securesms/conversation/ConversationItem.java index 2108e67bf6..e83ce53770 100644 --- a/src/org/thoughtcrime/securesms/conversation/ConversationItem.java +++ b/src/org/thoughtcrime/securesms/conversation/ConversationItem.java @@ -47,6 +47,7 @@ import org.thoughtcrime.securesms.R; import org.thoughtcrime.securesms.attachments.Attachment; import org.thoughtcrime.securesms.components.LinkPreviewView; import org.thoughtcrime.securesms.linkpreview.LinkPreview; +import org.thoughtcrime.securesms.linkpreview.LinkPreviewUtil; import org.thoughtcrime.securesms.logging.Log; import android.util.TypedValue; import android.view.View; @@ -55,6 +56,8 @@ import android.widget.LinearLayout; import android.widget.TextView; import android.widget.Toast; +import com.annimon.stream.Stream; + import org.thoughtcrime.securesms.attachments.DatabaseAttachment; import org.thoughtcrime.securesms.components.AlertView; import org.thoughtcrime.securesms.components.AudioView; @@ -660,7 +663,12 @@ public class ConversationItem extends LinearLayout boolean hasLinks = Linkify.addLinks(messageBody, shouldLinkifyAllLinks ? linkPattern : 0); if (hasLinks) { + Stream.of(messageBody.getSpans(0, messageBody.length(), URLSpan.class)) + .filterNot(url -> LinkPreviewUtil.isLegalUrl(url.getURL())) + .forEach(messageBody::removeSpan); + URLSpan[] urlSpans = messageBody.getSpans(0, messageBody.length(), URLSpan.class); + for (URLSpan urlSpan : urlSpans) { int start = messageBody.getSpanStart(urlSpan); int end = messageBody.getSpanEnd(urlSpan); diff --git a/src/org/thoughtcrime/securesms/linkpreview/LegalUrlPatterns.java b/src/org/thoughtcrime/securesms/linkpreview/LegalUrlPatterns.java new file mode 100644 index 0000000000..1fe3ef7b22 --- /dev/null +++ b/src/org/thoughtcrime/securesms/linkpreview/LegalUrlPatterns.java @@ -0,0 +1,99 @@ +package org.thoughtcrime.securesms.linkpreview; + +import java.util.regex.Pattern; + +public class LegalUrlPatterns { + + public static final Pattern LATIN = Pattern.compile("[" + + "\\x{0041}-\\x{005A}" + + "\\x{0061}-\\x{007A}" + + "\\x{00AA}" + + "\\x{00BA}" + + "\\x{00C0}-\\x{00DC}" + + "\\x{00D8}-\\x{00F6}" + + "\\x{00F8}-\\x{01BA}" + + "]"); + + public static final Pattern CYRILLIC = Pattern.compile("[" + + "\\x{0400}-\\x{0481}" + + "\\x{0482}" + + "\\x{0483}-\\x{0484}" + + "\\x{0487}" + + "\\x{0488}-\\x{0489}" + + "\\x{048A}-\\x{052F}" + + "\\x{1C80}-\\x{1C88}" + + "\\x{1D2B}" + + "\\x{1D78}" + + "\\x{2DE0}-\\x{2DFF}" + + "\\x{A640}-\\x{A66D}" + + "\\x{A66E}" + + "\\x{A66F}" + + "\\x{A670}-\\x{A672}" + + "\\x{A673}" + + "\\x{A674}-\\x{A67D}" + + "\\x{A67E}" + + "\\x{A67F}" + + "\\x{A680}-\\x{A69B}" + + "\\x{A69C}-\\x{A69D}" + + "\\x{A69E}-\\x{A69F}" + + "\\x{FE2E}-\\x{FE2F}" + + "]"); + + public static final Pattern GREEK = Pattern.compile("[" + + "\\x{0370}-\\x{0373}" + + "\\x{0375}" + + "\\x{0376}-\\x{0377}" + + "\\x{037A}" + + "\\x{037B}-\\x{037D}" + + "\\x{037F}" + + "\\x{0384}" + + "\\x{0386}" + + "\\x{0388}-\\x{038A}" + + "\\x{038C}" + + "\\x{038E}-\\x{03A1}" + + "\\x{03A3}-\\x{03E1}" + + "\\x{03F0}-\\x{03F5}" + + "\\x{03F6}" + + "\\x{03F7}-\\x{03FF}" + + "\\x{1D26}-\\x{1D2A}" + + "\\x{1D5D}-\\x{1D61}" + + "\\x{1D66}-\\x{1D6A}" + + "\\x{1DBF}" + + "\\x{1F00}-\\x{1F15}" + + "\\x{1F18}-\\x{1F1D}" + + "\\x{1F20}-\\x{1F45}" + + "\\x{1F48}-\\x{1F4D}" + + "\\x{1F50}-\\x{1F57}" + + "\\x{1F59}" + + "\\x{1F5B}" + + "\\x{1F5D}" + + "\\x{1F5F}-\\x{1F7D}" + + "\\x{1F80}-\\x{1FB4}" + + "\\x{1FB6}-\\x{1FBC}" + + "\\x{1FBD}" + + "\\x{1FBE}" + + "\\x{1FBF}-\\x{1FC1}" + + "\\x{1FC2}-\\x{1FC4}" + + "\\x{1FC6}-\\x{1FCC}" + + "\\x{1FCD}-\\x{1FCF}" + + "\\x{1FD0}-\\x{1FD3}" + + "\\x{1FD6}-\\x{1FDB}" + + "\\x{1FDD}-\\x{1FDF}" + + "\\x{1FE0}-\\x{1FEC}" + + "\\x{1FED}-\\x{1FEF}" + + "\\x{1FF2}-\\x{1FF4}" + + "\\x{1FF6}-\\x{1FFC}" + + "\\x{1FFD}-\\x{1FFE}" + + "\\x{2126}" + + "\\x{AB65}" + + "\\x{10140}-\\x{10174}"+ + "\\x{10175}-\\x{10178}"+ + "\\x{10179}-\\x{10189}"+ + "\\x{1018A}-\\x{1018B}"+ + "\\x{1018C}-\\x{1018E}"+ + "\\x{101A0}"+ + "\\x{1D200}-\\x{1D241}"+ + "\\x{1D242}-\\x{1D244}"+ + "\\x{1D245}"+ + "]"); +} diff --git a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java index 020868bdf5..4124d2fff6 100644 --- a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java +++ b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java @@ -37,10 +37,11 @@ public final class LinkPreviewUtil { */ public static boolean isWhitelistedLinkUrl(@NonNull String linkUrl) { HttpUrl url = HttpUrl.parse(linkUrl); - return url != null && - !TextUtils.isEmpty(url.scheme()) && - "https".equals(url.scheme()) && - LinkPreviewDomains.LINKS.contains(url.host()); + return url != null && + !TextUtils.isEmpty(url.scheme()) && + "https".equals(url.scheme()) && + LinkPreviewDomains.LINKS.contains(url.host()) && + isLegalUrl(linkUrl); } /** @@ -48,9 +49,18 @@ public final class LinkPreviewUtil { */ public static boolean isWhitelistedMediaUrl(@NonNull String mediaUrl) { HttpUrl url = HttpUrl.parse(mediaUrl); - return url != null && - !TextUtils.isEmpty(url.scheme()) && - "https".equals(url.scheme()) && - LinkPreviewDomains.IMAGES.contains(url.topPrivateDomain()); + return url != null && + !TextUtils.isEmpty(url.scheme()) && + "https".equals(url.scheme()) && + LinkPreviewDomains.IMAGES.contains(url.topPrivateDomain()) && + isLegalUrl(mediaUrl); + } + + public static boolean isLegalUrl(@NonNull String url) { + if (LegalUrlPatterns.LATIN.matcher(url).find()) { + return !LegalUrlPatterns.CYRILLIC.matcher(url).find() && + !LegalUrlPatterns.GREEK.matcher(url).find(); + } + return true; } } diff --git a/test/unitTest/java/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtilTest.java b/test/unitTest/java/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtilTest.java new file mode 100644 index 0000000000..8d1c972955 --- /dev/null +++ b/test/unitTest/java/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtilTest.java @@ -0,0 +1,24 @@ +package org.thoughtcrime.securesms.linkpreview; + +import org.junit.Test; + +import static junit.framework.TestCase.assertFalse; +import static junit.framework.TestCase.assertTrue; + +public class LinkPreviewUtilTest { + + @Test + public void isLegal_allLatin() { + assertTrue(LinkPreviewUtil.isLegalUrl("https://signal.org")); + } + + @Test + public void isLegal_latinAndCyrillic() { + assertFalse(LinkPreviewUtil.isLegalUrl("https://www.аmazon.com")); + } + + @Test + public void isLegal_latinAndGreek() { + assertFalse(LinkPreviewUtil.isLegalUrl("https://www.αpple.com")); + } +}