2 * Copyright (C) 2014 Open Whisper Systems
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 package org
.asamk
.signal
.logging
;
20 import java
.util
.Arrays
;
21 import java
.util
.HashSet
;
22 import java
.util
.Locale
;
24 import java
.util
.regex
.Matcher
;
25 import java
.util
.regex
.Pattern
;
28 * Scrub data for possibly sensitive information.
30 public final class Scrubber
{
36 * The middle group will be censored.
37 * Supposedly, the shortest international phone numbers in use contain seven digits.
38 * Handles URL encoded +, %2B
40 private static final Pattern E164_PATTERN
= Pattern
.compile("(\\+|%2B)(\\d{5,13})(\\d{2})");
41 private static final String E164_CENSOR
= "*************";
44 * The second group will be censored.
46 private static final Pattern CRUDE_EMAIL_PATTERN
= Pattern
.compile("\\b([^\\s/])([^\\s/]*@[^\\s]+)");
47 private static final String EMAIL_CENSOR
= "...@...";
50 * The middle group will be censored.
52 private static final Pattern UUID_PATTERN
= Pattern
.compile(
53 "(JOB::)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{10})([0-9a-f]{2})",
54 Pattern
.CASE_INSENSITIVE
);
55 private static final String UUID_CENSOR
= "********-****-****-****-**********";
58 * The entire string is censored.
60 private static final Pattern IPV4_PATTERN
= Pattern
.compile("\\b"
61 + "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\."
62 + "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\."
63 + "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\."
64 + "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"
66 private static final String IPV4_CENSOR
= "...ipv4...";
69 * The domain name except for TLD will be censored.
71 private static final Pattern DOMAIN_PATTERN
= Pattern
.compile("([a-z0-9]+\\.)+([a-z0-9\\-]*[a-z\\-][a-z0-9\\-]*)",
72 Pattern
.CASE_INSENSITIVE
);
73 private static final String DOMAIN_CENSOR
= "***.";
74 private static final Set
<String
> TOP_100_TLDS
= new HashSet
<>(Arrays
.asList("com",
173 public static CharSequence
scrub(CharSequence
in) {
178 in = scrubDomains(in);
184 private static CharSequence
scrubE164(CharSequence
in) {
187 (matcher
, output
) -> output
.append(matcher
.group(1))
188 .append(E164_CENSOR
, 0, matcher
.group(2).length())
189 .append(matcher
.group(3)));
192 private static CharSequence
scrubEmail(CharSequence
in) {
195 (matcher
, output
) -> output
.append(matcher
.group(1)).append(EMAIL_CENSOR
));
198 private static CharSequence
scrubUuids(CharSequence
in) {
199 return scrub(in, UUID_PATTERN
, (matcher
, output
) -> {
200 if (matcher
.group(1) != null && !matcher
.group(1).isEmpty()) {
201 output
.append(matcher
.group(1)).append(matcher
.group(2)).append(matcher
.group(3));
203 output
.append(UUID_CENSOR
).append(matcher
.group(3));
208 private static CharSequence
scrubDomains(CharSequence
in) {
209 return scrub(in, DOMAIN_PATTERN
, (matcher
, output
) -> {
210 String match
= matcher
.group(0);
211 if (matcher
.groupCount() == 2
212 && TOP_100_TLDS
.contains(matcher
.group(2).toLowerCase(Locale
.US
))
213 && !match
.endsWith("whispersystems.org")
214 && !match
.endsWith("signal.org")) {
215 output
.append(DOMAIN_CENSOR
).append(matcher
.group(2));
217 output
.append(match
);
222 private static CharSequence
scrubIpv4(CharSequence
in) {
223 return scrub(in, IPV4_PATTERN
, (matcher
, output
) -> output
.append(IPV4_CENSOR
));
226 private static CharSequence
scrub(
227 CharSequence
in, Pattern pattern
, ProcessMatch processMatch
229 final StringBuilder output
= new StringBuilder(in.length());
230 final Matcher matcher
= pattern
.matcher(in);
232 int lastEndingPos
= 0;
234 while (matcher
.find()) {
235 output
.append(in, lastEndingPos
, matcher
.start());
237 processMatch
.scrubMatch(matcher
, output
);
239 lastEndingPos
= matcher
.end();
242 if (lastEndingPos
== 0) {
243 // there were no matches, save copying all the data
246 output
.append(in, lastEndingPos
, in.length());
252 private interface ProcessMatch
{
254 void scrubMatch(Matcher matcher
, StringBuilder output
);