From 37bcac40bbe65837d3760ee867538b16f92068aa Mon Sep 17 00:00:00 2001 From: Alan Evans Date: Tue, 6 Aug 2019 16:02:12 -0400 Subject: [PATCH] URL encoded scrubber. * Replace scrubber and tests. * Improves email regex performance. --- .../logsubmit/SubmitLogFragment.java | 10 +- .../securesms/logsubmit/util/Scrubber.java | 111 ++++++++++++------ .../securesms/logsubmit/ScrubberTest.java | 57 --------- .../logsubmit/util/ScrubberTest.java | 86 ++++++++++++++ 4 files changed, 166 insertions(+), 98 deletions(-) delete mode 100644 test/unitTest/java/org/thoughtcrime/securesms/logsubmit/ScrubberTest.java create mode 100644 test/unitTest/java/org/thoughtcrime/securesms/logsubmit/util/ScrubberTest.java diff --git a/src/org/thoughtcrime/securesms/logsubmit/SubmitLogFragment.java b/src/org/thoughtcrime/securesms/logsubmit/SubmitLogFragment.java index d6f5cfd3d3..eed5c4c96d 100644 --- a/src/org/thoughtcrime/securesms/logsubmit/SubmitLogFragment.java +++ b/src/org/thoughtcrime/securesms/logsubmit/SubmitLogFragment.java @@ -353,16 +353,14 @@ public class SubmitLogFragment extends Fragment { Context context = weakContext.get(); if (context == null) return null; - Scrubber scrubber = new Scrubber(); - - String newLogs; + CharSequence newLogs; try { long t1 = System.currentTimeMillis(); String logs = ApplicationContext.getInstance(context).getPersistentLogger().getLogs().get(); Log.i(TAG, "Fetch our logs : " + (System.currentTimeMillis() - t1) + " ms"); long t2 = System.currentTimeMillis(); - newLogs = scrubber.scrub(logs); + newLogs = Scrubber.scrub(logs); Log.i(TAG, "Scrub our logs: " + (System.currentTimeMillis() - t2) + " ms"); } catch (InterruptedException | ExecutionException e) { Log.w(TAG, "Failed to retrieve new logs.", e); @@ -374,7 +372,7 @@ public class SubmitLogFragment extends Fragment { Log.i(TAG, "Fetch logcat: " + (System.currentTimeMillis() - t3) + " ms"); long t4 = System.currentTimeMillis(); - String scrubbedLogcat = scrubber.scrub(logcat); + CharSequence scrubbedLogcat = Scrubber.scrub(logcat); Log.i(TAG, "Scrub logcat: " + (System.currentTimeMillis() - t4) + " ms"); @@ -386,7 +384,7 @@ public class SubmitLogFragment extends Fragment { .append("\n\n\n") .append(HEADER_JOBS) .append("\n\n") - .append(scrubber.scrub(ApplicationContext.getInstance(context).getJobManager().getDebugInfo())) + .append(Scrubber.scrub(ApplicationContext.getInstance(context).getJobManager().getDebugInfo())) .append("\n\n\n"); if (VERSION.SDK_INT >= 28) { diff --git a/src/org/thoughtcrime/securesms/logsubmit/util/Scrubber.java b/src/org/thoughtcrime/securesms/logsubmit/util/Scrubber.java index d94d97df07..5b8893ea21 100644 --- a/src/org/thoughtcrime/securesms/logsubmit/util/Scrubber.java +++ b/src/org/thoughtcrime/securesms/logsubmit/util/Scrubber.java @@ -17,54 +17,95 @@ package org.thoughtcrime.securesms.logsubmit.util; +import androidx.annotation.NonNull; + import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * Scrub data for possibly sensitive information + * Scrub data for possibly sensitive information. */ -public class Scrubber { - private static final String TAG = Scrubber.class.getSimpleName(); +public final class Scrubber { - private static final Pattern E164_PATTERN = Pattern.compile("\\+\\d{10,15}"); - private static final Pattern GROUPID_PATTERN = Pattern.compile("__textsecure_group__![^\\s]+"); - private static final Pattern EMAIL_PATTERN = Pattern.compile("[^\\s]+@[^\\s]+"); - - private static final Pattern[] DEFAULTS = new Pattern[] { - E164_PATTERN, - GROUPID_PATTERN, - EMAIL_PATTERN - }; - - private final Pattern[] patterns; - public Scrubber(Pattern... patterns) { - this.patterns = patterns; + private Scrubber() { } - public Scrubber() { - this(DEFAULTS); + /** + * The middle group will be censored. + * Handles URL encoded +, %2B + */ + private static final Pattern E164_PATTERN = Pattern.compile("(\\+|%2B)(\\d{8,13})(\\d{2})"); + private static final String E164_CENSOR = "*************"; + + /** + * The second group will be censored. + */ + private static final Pattern CRUDE_EMAIL_PATTERN = Pattern.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)"); + private static final String EMAIL_CENSOR = "...@..."; + + /** + * The middle group will be censored. + */ + private static final Pattern GROUP_ID_PATTERN = Pattern.compile("(__)(textsecure_group__![^\\s]+)([^\\s]{2})"); + private static final String GROUP_ID_CENSOR = "...group..."; + + public static CharSequence scrub(@NonNull CharSequence in) { + + in = scrubE164(in); + in = scrubEmail(in); + in = scrubGroups(in); + + return in; } - public String scrub(final String in) { - String out = in; - for (Pattern pattern : patterns) { - Matcher matcher = pattern.matcher(out); - StringBuilder builder = new StringBuilder(); - int lastEndingPos = 0; + private static CharSequence scrubE164(@NonNull CharSequence in) { + return scrub(in, + E164_PATTERN, + (matcher, output) -> output.append(matcher.group(1)) + .append(E164_CENSOR, 0, matcher.group(2).length()) + .append(matcher.group(3))); + } - while (matcher.find()) { - builder.append(out.substring(lastEndingPos, matcher.start())); + private static CharSequence scrubEmail(@NonNull CharSequence in) { + return scrub(in, + CRUDE_EMAIL_PATTERN, + (matcher, output) -> output.append(matcher.group(1)) + .append(EMAIL_CENSOR)); + } - final String censored = matcher.group().substring(0,1) + - new String(new char[matcher.group().length()-3]).replace("\0", "*") + - matcher.group().substring(matcher.group().length()-2); - builder.append(censored); + private static CharSequence scrubGroups(@NonNull CharSequence in) { + return scrub(in, + GROUP_ID_PATTERN, + (matcher, output) -> output.append(matcher.group(1)) + .append(GROUP_ID_CENSOR) + .append(matcher.group(3))); + } - lastEndingPos = matcher.end(); - } - builder.append(out.substring(lastEndingPos)); - out = builder.toString(); + private static CharSequence scrub(@NonNull CharSequence in, @NonNull Pattern pattern, @NonNull ProcessMatch processMatch) { + final StringBuilder output = new StringBuilder(in.length()); + final Matcher matcher = pattern.matcher(in); + + int lastEndingPos = 0; + + while (matcher.find()) { + output.append(in, lastEndingPos, matcher.start()); + + processMatch.scrubMatch(matcher, output); + + lastEndingPos = matcher.end(); } - return out; + + if (lastEndingPos == 0) { + // there were no matches, save copying all the data + return in; + } else { + output.append(in, lastEndingPos, in.length()); + + return output; + } + } + + private interface ProcessMatch { + void scrubMatch(@NonNull Matcher matcher, @NonNull StringBuilder output); } } diff --git a/test/unitTest/java/org/thoughtcrime/securesms/logsubmit/ScrubberTest.java b/test/unitTest/java/org/thoughtcrime/securesms/logsubmit/ScrubberTest.java deleted file mode 100644 index 1578e26863..0000000000 --- a/test/unitTest/java/org/thoughtcrime/securesms/logsubmit/ScrubberTest.java +++ /dev/null @@ -1,57 +0,0 @@ -package org.thoughtcrime.securesms.logsubmit; - -import org.junit.Test; -import org.thoughtcrime.securesms.logsubmit.util.Scrubber; - -import static org.junit.Assert.assertEquals; - -public class ScrubberTest { - - @Test - public void scrub_phoneNumber_solo() { - Scrubber scrubber = new Scrubber(); - String output = scrubber.scrub("+16101234567"); - - assertEquals("+*********67", output); - } - - @Test - public void scrub_phoneNumber_surrounded() { - Scrubber scrubber = new Scrubber(); - String output = scrubber.scrub("Spider-Man's phone number is +16101234567 -- isn't that crazy?"); - - assertEquals("Spider-Man's phone number is +*********67 -- isn't that crazy?", output); - } - - @Test - public void scrub_email_solo() { - Scrubber scrubber = new Scrubber(); - String output = scrubber.scrub("jonah@dailybugle.com"); - - assertEquals("j*****************om", output); - } - - @Test - public void scrub_email_surrounded() { - Scrubber scrubber = new Scrubber(); - String output = scrubber.scrub("Email tips to jonah@dailybugle.com -- it's your civic duty"); - - assertEquals("Email tips to j*****************om -- it's your civic duty", output); - } - - @Test - public void scrub_groupId_solo() { - Scrubber scrubber = new Scrubber(); - String output = scrubber.scrub("__textsecure_group__!abcdefg1234567890"); - - assertEquals("_***********************************90", output); - } - - @Test - public void scrub_groupId_surrounded() { - Scrubber scrubber = new Scrubber(); - String output = scrubber.scrub("The group id is __textsecure_group__!abcdefg1234567890 and don't forget it"); - - assertEquals("The group id is _***********************************90 and don't forget it", output); - } -} diff --git a/test/unitTest/java/org/thoughtcrime/securesms/logsubmit/util/ScrubberTest.java b/test/unitTest/java/org/thoughtcrime/securesms/logsubmit/util/ScrubberTest.java new file mode 100644 index 0000000000..a8ad046f24 --- /dev/null +++ b/test/unitTest/java/org/thoughtcrime/securesms/logsubmit/util/ScrubberTest.java @@ -0,0 +1,86 @@ +package org.thoughtcrime.securesms.logsubmit.util; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; + +import static org.junit.Assert.assertEquals; + +@RunWith(Parameterized.class) +public final class ScrubberTest { + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][]{ + + { "An E164 number +15551234567", + "An E164 number +*********67" }, + + { "A UK number +447700900000", + "A UK number +**********00" }, + + { "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B447700900099", + "An avatar filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/%2B**********99" }, + + { "Multiple numbers +447700900001 +447700900002", + "Multiple numbers +**********01 +**********02" }, + + { "One less than shortest number +155556789", + "One less than shortest number +155556789" }, + + { "Shortest number +1555567890", + "Shortest number +********90" }, + + { "Longest number +155556789012345", + "Longest number +*************45" }, + + { "One more than longest number +1234567890123456", + "One more than longest number +*************456" }, + + { "abc@def.com", + "a...@..." }, + + { "An email abc@def.com", + "An email a...@..." }, + + { "A short email a@def.com", + "A short email a...@..." }, + + { "A email with multiple parts before the @ d.c+b.a@mulitpart.domain.com and a multipart domain", + "A email with multiple parts before the @ d...@... and a multipart domain" }, + + { "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/abc@signal.org", + "An avatar email filename: file:///data/user/0/org.thoughtcrime.securesms/files/avatars/a...@..." }, + + { "An email and a number abc@def.com +155556789012345", + "An email and a number a...@... +*************45" }, + + { "__textsecure_group__!abcdefg1234567890", + "__...group...90" }, + + { "A group id __textsecure_group__!abcdefg0987654321 surrounded with text", + "A group id __...group...21 surrounded with text" }, + + { "All patterns in a row __textsecure_group__!abcdefg1234567890 +1234567890123456 abc@def.com with text after", + "All patterns in a row __...group...90 +*************456 a...@... with text after" + } + + }); + } + + private final String input; + private final String expected; + + public ScrubberTest(String input, String expected) { + this.input = input; + this.expected = expected; + } + + @Test + public void scrub() { + assertEquals(expected, Scrubber.scrub(input).toString()); + } +}