From d6c5892a158905c8ef1e1d4fc66f15b016fdebab Mon Sep 17 00:00:00 2001 From: Ryan ZHAO Date: Wed, 18 Nov 2020 15:17:38 +1100 Subject: [PATCH 1/5] improve link preview to support more sites --- .../linkpreview/LinkPreviewRepository.java | 31 +--- .../linkpreview/LinkPreviewUtil.java | 143 +++++++++++++++++- .../securesms/util/DateUtils.java | 36 +++++ 3 files changed, 183 insertions(+), 27 deletions(-) diff --git a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewRepository.java b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewRepository.java index 9dde7e0fea..a849a5e775 100644 --- a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewRepository.java +++ b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewRepository.java @@ -10,7 +10,6 @@ import android.text.Html; import android.text.TextUtils; import com.bumptech.glide.load.engine.DiskCacheStrategy; -import com.bumptech.glide.request.FutureTarget; import com.google.android.gms.common.util.IOUtils; import org.thoughtcrime.securesms.ApplicationContext; @@ -18,7 +17,6 @@ import org.thoughtcrime.securesms.attachments.Attachment; import org.thoughtcrime.securesms.attachments.UriAttachment; import org.thoughtcrime.securesms.database.AttachmentDatabase; import org.thoughtcrime.securesms.dependencies.InjectableType; -import org.thoughtcrime.securesms.giph.model.ChunkedImageUrl; import org.thoughtcrime.securesms.logging.Log; import org.thoughtcrime.securesms.mms.GlideApp; import org.thoughtcrime.securesms.net.CallRequestController; @@ -37,6 +35,7 @@ import org.whispersystems.libsignal.util.guava.Optional; import org.whispersystems.signalservice.api.SignalServiceMessageReceiver; import org.whispersystems.signalservice.api.messages.SignalServiceStickerManifest; import org.whispersystems.signalservice.api.messages.SignalServiceStickerManifest.StickerInfo; +import org.thoughtcrime.securesms.linkpreview.LinkPreviewUtil.OpenGraph; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -116,7 +115,8 @@ public class LinkPreviewRepository implements InjectableType { } private @NonNull RequestController fetchMetadata(@NonNull String url, Callback callback) { - Call call = client.newCall(new Request.Builder().url(url).cacheControl(NO_CACHE).build()); + Call call = client.newCall(new Request.Builder().url(url).removeHeader("User-Agent").addHeader("User-Agent", + "WhatsApp").cacheControl(NO_CACHE).build()); call.enqueue(new okhttp3.Callback() { @Override @@ -138,8 +138,9 @@ public class LinkPreviewRepository implements InjectableType { } String body = response.body().string(); - Optional title = getProperty(body, "title"); - Optional imageUrl = getProperty(body, "image"); + OpenGraph openGraph = LinkPreviewUtil.parseOpenGraphFields(body); + Optional title = openGraph.getTitle(); + Optional imageUrl = openGraph.getImageUrl(); if (imageUrl.isPresent() && !LinkPreviewUtil.isValidMediaUrl(imageUrl.get())) { Log.i(TAG, "Image URL was invalid or for a non-whitelisted domain. Skipping."); @@ -161,6 +162,8 @@ public class LinkPreviewRepository implements InjectableType { try { Response response = call.execute(); if (!response.isSuccessful() || response.body() == null) { + controller.cancel(); + callback.onComplete(Optional.absent()); return; } @@ -184,24 +187,6 @@ public class LinkPreviewRepository implements InjectableType { return controller; } - private @NonNull Optional getProperty(@NonNull String searchText, @NonNull String property) { - Pattern pattern = Pattern.compile("<\\s*meta\\s+property\\s*=\\s*\"\\s*og:" + property + "\\s*\"\\s+[^>]*content\\s*=\\s*\"(.*?)\"[^>]*/?\\s*>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); - Matcher matcher = pattern.matcher(searchText); - if (matcher.find()) { - String text = Html.fromHtml(matcher.group(1)).toString(); - if (!TextUtils.isEmpty(text)) { return Optional.of(text); } - } - - pattern = Pattern.compile("<\\s*" + property + "[^>]*>(.*?)<\\s*/" + property + "[^>]*>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); - matcher = pattern.matcher(searchText); - if (matcher.find()) { - String text = Html.fromHtml(matcher.group(1)).toString(); - if (!TextUtils.isEmpty(text)) { return Optional.of(text); } - } - - return Optional.absent(); - } - private RequestController fetchStickerPackLinkPreview(@NonNull Context context, @NonNull String packUrl, @NonNull Callback> callback) diff --git a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java index c9ca8e3b6c..f3eb3c9046 100644 --- a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java +++ b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java @@ -2,6 +2,9 @@ package org.thoughtcrime.securesms.linkpreview; import androidx.annotation.NonNull; import androidx.annotation.Nullable; + +import android.annotation.SuppressLint; +import android.text.Html; import android.text.SpannableString; import android.text.TextUtils; import android.text.style.URLSpan; @@ -10,9 +13,14 @@ import android.text.util.Linkify; import com.annimon.stream.Stream; import org.thoughtcrime.securesms.stickers.StickerUrl; +import org.thoughtcrime.securesms.util.DateUtils; +import org.thoughtcrime.securesms.util.Util; +import org.whispersystems.libsignal.util.guava.Optional; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -20,10 +28,15 @@ import okhttp3.HttpUrl; public final class LinkPreviewUtil { - private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$"); - private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$"); - private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$"); - private static final Pattern STICKER_URL_PATTERN = Pattern.compile("^.*#pack_id=(.*)&pack_key=(.*)$"); + private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$"); + private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$"); + private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$"); + private static final Pattern OPEN_GRAPH_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*og:([^\"]+)\"[^>]*/?\\s*>"); + private static final Pattern ARTICLE_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*article:([^\"]+)\"[^>]*/?\\s*>"); + private static final Pattern OPEN_GRAPH_CONTENT_PATTERN = Pattern.compile("content\\s*=\\s*\"([^\"]*)\""); + private static final Pattern TITLE_PATTERN = Pattern.compile("<\\s*title[^>]*>(.*)<\\s*/title[^>]*>"); + private static final Pattern FAVICON_PATTERN = Pattern.compile("<\\s*link[^>]*rel\\s*=\\s*\".*icon.*\"[^>]*>"); + private static final Pattern FAVICON_HREF_PATTERN = Pattern.compile("href\\s*=\\s*\"([^\"]*)\""); /** * @return All whitelisted URLs in the source text. @@ -82,4 +95,126 @@ public final class LinkPreviewUtil { return false; } } + + public static @NonNull OpenGraph parseOpenGraphFields(@Nullable String html) { + return parseOpenGraphFields(html, text -> Html.fromHtml(text).toString()); + } + + static @NonNull OpenGraph parseOpenGraphFields(@Nullable String html, @NonNull HtmlDecoder htmlDecoder) { + if (html == null) { + return new OpenGraph(Collections.emptyMap(), null, null); + } + + Map openGraphTags = new HashMap<>(); + Matcher openGraphMatcher = OPEN_GRAPH_TAG_PATTERN.matcher(html); + + while (openGraphMatcher.find()) { + String tag = openGraphMatcher.group(); + String property = openGraphMatcher.groupCount() > 0 ? openGraphMatcher.group(1) : null; + + if (property != null) { + Matcher contentMatcher = OPEN_GRAPH_CONTENT_PATTERN.matcher(tag); + if (contentMatcher.find() && contentMatcher.groupCount() > 0) { + String content = htmlDecoder.fromEncoded(contentMatcher.group(1)); + openGraphTags.put(property.toLowerCase(), content); + } + } + } + + Matcher articleMatcher = ARTICLE_TAG_PATTERN.matcher(html); + + while (articleMatcher.find()) { + String tag = articleMatcher.group(); + String property = articleMatcher.groupCount() > 0 ? articleMatcher.group(1) : null; + + if (property != null) { + Matcher contentMatcher = OPEN_GRAPH_CONTENT_PATTERN.matcher(tag); + if (contentMatcher.find() && contentMatcher.groupCount() > 0) { + String content = htmlDecoder.fromEncoded(contentMatcher.group(1)); + openGraphTags.put(property.toLowerCase(), content); + } + } + } + + String htmlTitle = ""; + String faviconUrl = ""; + + Matcher titleMatcher = TITLE_PATTERN.matcher(html); + if (titleMatcher.find() && titleMatcher.groupCount() > 0) { + htmlTitle = htmlDecoder.fromEncoded(titleMatcher.group(1)); + } + + Matcher faviconMatcher = FAVICON_PATTERN.matcher(html); + if (faviconMatcher.find()) { + Matcher faviconHrefMatcher = FAVICON_HREF_PATTERN.matcher(faviconMatcher.group()); + if (faviconHrefMatcher.find() && faviconHrefMatcher.groupCount() > 0) { + faviconUrl = faviconHrefMatcher.group(1); + } + } + + return new OpenGraph(openGraphTags, htmlTitle, faviconUrl); + } + + private static @Nullable String parseTopLevelDomain(@NonNull String domain) { + int periodIndex = domain.lastIndexOf("."); + + if (periodIndex >= 0 && periodIndex < domain.length() - 1) { + return domain.substring(periodIndex + 1); + } else { + return null; + } + } + + + public static final class OpenGraph { + + private final Map values; + + private final @Nullable String htmlTitle; + private final @Nullable String faviconUrl; + + private static final String KEY_TITLE = "title"; + private static final String KEY_DESCRIPTION_URL = "description"; + private static final String KEY_IMAGE_URL = "image"; + private static final String KEY_PUBLISHED_TIME_1 = "published_time"; + private static final String KEY_PUBLISHED_TIME_2 = "article:published_time"; + private static final String KEY_MODIFIED_TIME_1 = "modified_time"; + private static final String KEY_MODIFIED_TIME_2 = "article:modified_time"; + + public OpenGraph(@NonNull Map values, @Nullable String htmlTitle, @Nullable String faviconUrl) { + this.values = values; + this.htmlTitle = htmlTitle; + this.faviconUrl = faviconUrl; + } + + public @NonNull Optional getTitle() { + return Optional.of(Util.getFirstNonEmpty(values.get(KEY_TITLE), htmlTitle)); + } + + public @NonNull Optional getImageUrl() { + return Optional.of(Util.getFirstNonEmpty(values.get(KEY_IMAGE_URL), faviconUrl)); + } + + @SuppressLint("ObsoleteSdkInt") + public long getDate() { + return Stream.of(values.get(KEY_PUBLISHED_TIME_1), + values.get(KEY_PUBLISHED_TIME_2), + values.get(KEY_MODIFIED_TIME_1), + values.get(KEY_MODIFIED_TIME_2)) + .map(DateUtils::parseIso8601) + .filter(time -> time > 0) + .findFirst() + .orElse(0L); + } + + public @NonNull + Optional getDescription() { + return Optional.of(values.get(KEY_DESCRIPTION_URL)); + } + } + + public interface HtmlDecoder { + @NonNull String fromEncoded(@NonNull String html); + } + } diff --git a/src/org/thoughtcrime/securesms/util/DateUtils.java b/src/org/thoughtcrime/securesms/util/DateUtils.java index a7fc61fb4f..901aba7dc2 100644 --- a/src/org/thoughtcrime/securesms/util/DateUtils.java +++ b/src/org/thoughtcrime/securesms/util/DateUtils.java @@ -16,10 +16,17 @@ */ package org.thoughtcrime.securesms.util; +import android.annotation.SuppressLint; import android.content.Context; import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import android.os.Build; import android.text.format.DateFormat; +import org.thoughtcrime.securesms.logging.Log; + +import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; @@ -142,4 +149,33 @@ public class DateUtils extends android.text.format.DateUtils { private static String getLocalizedPattern(String template, Locale locale) { return DateFormat.getBestDateTimePattern(locale, template); } + + /** + * e.g. 2020-09-04T19:17:51Z + * https://www.iso.org/iso-8601-date-and-time-format.html + * + * Note: SDK_INT == 0 check needed to pass unit tests due to JVM date parser differences. + * + * @return The timestamp if able to be parsed, otherwise -1. + */ + @SuppressLint("ObsoleteSdkInt") + public static long parseIso8601(@Nullable String date) { + SimpleDateFormat format; + if (Build.VERSION.SDK_INT == 0 || Build.VERSION.SDK_INT >= 24) { + format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX", Locale.getDefault()); + } else { + format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.getDefault()); + } + + if (date.isEmpty()) { + return -1; + } + + try { + return format.parse(date).getTime(); + } catch (ParseException e) { + Log.w(TAG, "Failed to parse date.", e); + return -1; + } + } } From 53d0689cf7c4ab72502d219f7b748719c495728a Mon Sep 17 00:00:00 2001 From: Ryan ZHAO Date: Thu, 19 Nov 2020 16:58:11 +1100 Subject: [PATCH 2/5] make pattern case insensitive --- .../securesms/linkpreview/LinkPreviewUtil.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java index f3eb3c9046..abe9018061 100644 --- a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java +++ b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java @@ -28,15 +28,15 @@ import okhttp3.HttpUrl; public final class LinkPreviewUtil { - private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$"); - private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$"); - private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$"); - private static final Pattern OPEN_GRAPH_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*og:([^\"]+)\"[^>]*/?\\s*>"); - private static final Pattern ARTICLE_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*article:([^\"]+)\"[^>]*/?\\s*>"); - private static final Pattern OPEN_GRAPH_CONTENT_PATTERN = Pattern.compile("content\\s*=\\s*\"([^\"]*)\""); - private static final Pattern TITLE_PATTERN = Pattern.compile("<\\s*title[^>]*>(.*)<\\s*/title[^>]*>"); - private static final Pattern FAVICON_PATTERN = Pattern.compile("<\\s*link[^>]*rel\\s*=\\s*\".*icon.*\"[^>]*>"); - private static final Pattern FAVICON_HREF_PATTERN = Pattern.compile("href\\s*=\\s*\"([^\"]*)\""); + private static final Pattern DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$", Pattern.CASE_INSENSITIVE); + private static final Pattern ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$", Pattern.CASE_INSENSITIVE); + private static final Pattern ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$", Pattern.CASE_INSENSITIVE); + private static final Pattern OPEN_GRAPH_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*og:([^\"]+)\"[^>]*/?\\s*>", Pattern.CASE_INSENSITIVE); + private static final Pattern ARTICLE_TAG_PATTERN = Pattern.compile("<\\s*meta[^>]*property\\s*=\\s*\"\\s*article:([^\"]+)\"[^>]*/?\\s*>", Pattern.CASE_INSENSITIVE); + private static final Pattern OPEN_GRAPH_CONTENT_PATTERN = Pattern.compile("content\\s*=\\s*\"([^\"]*)\"", Pattern.CASE_INSENSITIVE); + private static final Pattern TITLE_PATTERN = Pattern.compile("<\\s*title[^>]*>(.*)<\\s*/title[^>]*>", Pattern.CASE_INSENSITIVE); + private static final Pattern FAVICON_PATTERN = Pattern.compile("<\\s*link[^>]*rel\\s*=\\s*\".*icon.*\"[^>]*>", Pattern.CASE_INSENSITIVE); + private static final Pattern FAVICON_HREF_PATTERN = Pattern.compile("href\\s*=\\s*\"([^\"]*)\"", Pattern.CASE_INSENSITIVE); /** * @return All whitelisted URLs in the source text. From 5e47d3b9acfbbc19c258d3fe983de0d69a56a4f2 Mon Sep 17 00:00:00 2001 From: Ryan ZHAO Date: Thu, 19 Nov 2020 17:09:24 +1100 Subject: [PATCH 3/5] show link preview cancel button --- res/layout/link_preview.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/res/layout/link_preview.xml b/res/layout/link_preview.xml index 09d371485a..493f65a41f 100644 --- a/res/layout/link_preview.xml +++ b/res/layout/link_preview.xml @@ -79,11 +79,11 @@ android:layout_marginEnd="6dp" android:layout_marginTop="4dp" android:src="@drawable/ic_close_white_18dp" - android:tint="@color/gray70" - android:visibility="gone" + android:visibility="visible" app:layout_constraintEnd_toEndOf="parent" app:layout_constraintTop_toTopOf="parent" - tools:visibility="visible" /> + app:tint="@color/gray70" + tools:ignore="MissingPrefix" /> Date: Fri, 20 Nov 2020 11:41:50 +1100 Subject: [PATCH 4/5] fix link preview close button --- res/layout/link_preview.xml | 4 ++-- .../securesms/components/LinkPreviewView.java | 11 +++++++++++ .../securesms/conversation/ConversationItem.java | 5 ++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/res/layout/link_preview.xml b/res/layout/link_preview.xml index 493f65a41f..c4215b810a 100644 --- a/res/layout/link_preview.xml +++ b/res/layout/link_preview.xml @@ -79,11 +79,11 @@ android:layout_marginEnd="6dp" android:layout_marginTop="4dp" android:src="@drawable/ic_close_white_18dp" - android:visibility="visible" + android:visibility="gone" app:layout_constraintEnd_toEndOf="parent" app:layout_constraintTop_toTopOf="parent" app:tint="@color/gray70" - tools:ignore="MissingPrefix" /> + tools:visibility="visible" /> Date: Mon, 23 Nov 2020 12:00:18 +1100 Subject: [PATCH 5/5] temporarily disable image with extension like '.ico' to make it the same iOS (just accept jpg, png and gif) --- .../securesms/linkpreview/LinkPreviewRepository.java | 5 +++++ .../securesms/linkpreview/LinkPreviewUtil.java | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewRepository.java b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewRepository.java index a849a5e775..9bc08332fb 100644 --- a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewRepository.java +++ b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewRepository.java @@ -147,6 +147,11 @@ public class LinkPreviewRepository implements InjectableType { imageUrl = Optional.absent(); } + if (imageUrl.isPresent() && !LinkPreviewUtil.isVaildMimeType(imageUrl.get())) { + Log.i(TAG, "Image URL was invalid mime type. Skipping."); + imageUrl = Optional.absent(); + } + callback.onComplete(new Metadata(title, imageUrl)); } }); diff --git a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java index abe9018061..be7a586e7a 100644 --- a/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java +++ b/src/org/thoughtcrime/securesms/linkpreview/LinkPreviewUtil.java @@ -17,6 +17,7 @@ import org.thoughtcrime.securesms.util.DateUtils; import org.thoughtcrime.securesms.util.Util; import org.whispersystems.libsignal.util.guava.Optional; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -96,6 +97,15 @@ public final class LinkPreviewUtil { } } + public static boolean isVaildMimeType(@NonNull String url) { + String[] vaildMimeType = {"jpg", "png", "gif", "jpeg"}; + if (url.contains(".")) { + String extenstion = url.substring(url.lastIndexOf(".") + 1).toLowerCase(); + return Arrays.asList(vaildMimeType).contains(extenstion); + } + return true; + } + public static @NonNull OpenGraph parseOpenGraphFields(@Nullable String html) { return parseOpenGraphFields(html, text -> Html.fromHtml(text).toString()); }