Restrict group names to 32 graphemes.

Uses some code from #10132 hence co-author:

Co-authored-by: Fumiaki Yoshimatsu <fumiakiy@gmail.com>
This commit is contained in:
Alan Evans
2021-01-13 19:31:37 -04:00
committed by Greyson Parrelli
parent 0ccc7e3c06
commit 2d39e43677
8 changed files with 346 additions and 15 deletions

View File

@@ -0,0 +1,146 @@
package org.signal.core.util;
import android.os.Build;
import androidx.annotation.NonNull;
import androidx.annotation.RequiresApi;
import java.util.Iterator;
public abstract class BreakIteratorCompat implements Iterable<CharSequence> {
public static final int DONE = -1;
private CharSequence charSequence;
public abstract int first();
public abstract int next();
public void setText(CharSequence charSequence) {
this.charSequence = charSequence;
}
public static BreakIteratorCompat getInstance() {
if (Build.VERSION.SDK_INT >= 24) {
return new AndroidIcuBreakIterator();
} else {
return new FallbackBreakIterator();
}
}
public int countBreaks() {
int breakCount = 0;
first();
while (next() != DONE) {
breakCount++;
}
return breakCount;
}
@Override
public @NonNull Iterator<CharSequence> iterator() {
return new Iterator<CharSequence>() {
int index1 = BreakIteratorCompat.this.first();
int index2 = BreakIteratorCompat.this.next();
@Override
public boolean hasNext() {
return index2 != DONE;
}
@Override
public CharSequence next() {
CharSequence c = index2 != DONE ? charSequence.subSequence(index1, index2) : "";
index1 = index2;
index2 = BreakIteratorCompat.this.next();
return c;
}
};
}
/**
* Take {@param atMost} graphemes from the start of string.
*/
public final CharSequence take(int atMost) {
if (atMost <= 0) return "";
StringBuilder stringBuilder = new StringBuilder(charSequence.length());
int count = 0;
for (CharSequence grapheme : this) {
stringBuilder.append(grapheme);
count++;
if (count >= atMost) break;
}
return stringBuilder.toString();
}
/**
* An BreakIteratorCompat implementation that delegates calls to `android.icu.text.BreakIterator`.
* This class handles grapheme clusters fine but requires Android API >= 24.
*/
@RequiresApi(24)
private static class AndroidIcuBreakIterator extends BreakIteratorCompat {
private final android.icu.text.BreakIterator breakIterator = android.icu.text.BreakIterator.getCharacterInstance();
@Override
public int first() {
return breakIterator.first();
}
@Override
public int next() {
return breakIterator.next();
}
@Override
public void setText(CharSequence charSequence) {
super.setText(charSequence);
if (Build.VERSION.SDK_INT >= 29) {
breakIterator.setText(charSequence);
} else {
breakIterator.setText(charSequence.toString());
}
}
}
/**
* An BreakIteratorCompat implementation that delegates calls to `java.text.BreakIterator`.
* This class may or may not handle grapheme clusters well depending on the underlying implementation.
* In the emulator, API 23 implements ICU version of the BreakIterator so that it handles grapheme
* clusters fine. But API 21 implements RuleBasedIterator which does not handle grapheme clusters.
* <p>
* If it doesn't handle grapheme clusters correctly, in most cases the combined characters are
* broken up into pieces when the code tries to trim a string. For example, an emoji that is
* a combination of a person, gender and skin tone, trimming the character using this class may result
* in trimming the parts of the character, e.g. a dark skin frowning woman emoji may result in
* a neutral skin frowning woman emoji.
*/
private static class FallbackBreakIterator extends BreakIteratorCompat {
private final java.text.BreakIterator breakIterator = java.text.BreakIterator.getCharacterInstance();
@Override
public int first() {
return breakIterator.first();
}
@Override
public int next() {
return breakIterator.next();
}
@Override
public void setText(CharSequence charSequence) {
super.setText(charSequence);
breakIterator.setText(charSequence.toString());
}
}
}

View File

@@ -0,0 +1,20 @@
package org.signal.core.util;
import android.text.InputFilter;
import android.widget.EditText;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public final class EditTextUtil {
private EditTextUtil() {
}
public static void addGraphemeClusterLimitFilter(EditText text, int maximumGraphemes) {
List<InputFilter> filters = new ArrayList<>(Arrays.asList(text.getFilters()));
filters.add(new GraphemeClusterLimitFilter(maximumGraphemes));
text.setFilters(filters.toArray(new InputFilter[0]));
}
}

View File

@@ -0,0 +1,53 @@
package org.signal.core.util;
import android.text.InputFilter;
import android.text.Spanned;
import org.signal.core.util.logging.Log;
/**
* This filter will constrain edits not to make the number of character breaks of the text
* greater than the specified maximum.
* <p>
* This means it will limit to a maximum number of grapheme clusters.
*/
public final class GraphemeClusterLimitFilter implements InputFilter {
private static final String TAG = Log.tag(GraphemeClusterLimitFilter.class);
private final BreakIteratorCompat breakIteratorCompat;
private final int max;
public GraphemeClusterLimitFilter(int max) {
this.breakIteratorCompat = BreakIteratorCompat.getInstance();
this.max = max;
}
@Override
public CharSequence filter(CharSequence source, int start, int end, Spanned dest, int dstart, int dend) {
CharSequence sourceFragment = source.subSequence(start, end);
CharSequence head = dest.subSequence(0, dstart);
CharSequence tail = dest.subSequence(dend, dest.length());
breakIteratorCompat.setText(String.format("%s%s%s", head, sourceFragment, tail));
int length = breakIteratorCompat.countBreaks();
if (length > max) {
breakIteratorCompat.setText(sourceFragment);
int sourceLength = breakIteratorCompat.countBreaks();
CharSequence trimmedSource = breakIteratorCompat.take(sourceLength - (length - max));
breakIteratorCompat.setText(String.format("%s%s%s", head, trimmedSource, tail));
int newExpectedCount = breakIteratorCompat.countBreaks();
if (newExpectedCount > max) {
Log.w(TAG, "Failed to create string under the required length " + newExpectedCount);
return "";
}
return trimmedSource;
}
return source;
}
}

View File

@@ -0,0 +1,104 @@
package org.signal.core.util;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
public final class BreakIteratorCompatTest {
@Test
public void empty() {
BreakIteratorCompat breakIterator = BreakIteratorCompat.getInstance();
breakIterator.setText("");
assertEquals(BreakIteratorCompat.DONE, breakIterator.next());
}
@Test
public void single() {
BreakIteratorCompat breakIterator = BreakIteratorCompat.getInstance();
breakIterator.setText("a");
assertEquals(1, breakIterator.next());
assertEquals(BreakIteratorCompat.DONE, breakIterator.next());
}
@Test
public void count_empty() {
BreakIteratorCompat breakIterator = BreakIteratorCompat.getInstance();
breakIterator.setText("");
assertEquals(0, breakIterator.countBreaks());
assertEquals(BreakIteratorCompat.DONE, breakIterator.next());
}
@Test
public void count_simple_text() {
BreakIteratorCompat breakIterator = BreakIteratorCompat.getInstance();
breakIterator.setText("abc");
assertEquals(3, breakIterator.countBreaks());
assertEquals(BreakIteratorCompat.DONE, breakIterator.next());
}
@Test
public void two_counts() {
BreakIteratorCompat breakIterator = BreakIteratorCompat.getInstance();
breakIterator.setText("abc");
assertEquals(3, breakIterator.countBreaks());
assertEquals(BreakIteratorCompat.DONE, breakIterator.next());
assertEquals(3, breakIterator.countBreaks());
}
@Test
public void count_multi_character_graphemes() {
String hindi = "समाजो गयेग";
BreakIteratorCompat breakIterator = BreakIteratorCompat.getInstance();
breakIterator.setText(hindi);
assertEquals(7, breakIterator.countBreaks());
assertEquals(BreakIteratorCompat.DONE, breakIterator.next());
}
@Test
public void iterate_multi_character_graphemes() {
String hindi = "समाजो गयेग";
BreakIteratorCompat breakIterator = BreakIteratorCompat.getInstance();
breakIterator.setText(hindi);
assertEquals(asList("", "मा", "जो", " ", "", "ये", ""), toList(breakIterator));
assertEquals(BreakIteratorCompat.DONE, breakIterator.next());
}
@Test
public void split_multi_character_graphemes() {
String hindi = "समाजो गयेग";
BreakIteratorCompat breakIterator = BreakIteratorCompat.getInstance();
breakIterator.setText(hindi);
assertEquals("समाजो गयेग", breakIterator.take(8));
assertEquals("समाजो गयेग", breakIterator.take(7));
assertEquals("समाजो गये", breakIterator.take(6));
assertEquals("समाजो ग", breakIterator.take(5));
assertEquals("समाजो ", breakIterator.take(4));
assertEquals("समाजो", breakIterator.take(3));
assertEquals("समा", breakIterator.take(2));
assertEquals("", breakIterator.take(1));
assertEquals("", breakIterator.take(0));
assertEquals("", breakIterator.take(-1));
}
private List<CharSequence> toList(BreakIteratorCompat breakIterator) {
List<CharSequence> list = new ArrayList<>();
breakIterator.forEach(list::add);
return list;
}
}