From b5e026575853ac7a7d984c7937320d54a96d5436 Mon Sep 17 00:00:00 2001 From: Matthew Chen Date: Tue, 12 Jun 2018 16:40:28 -0400 Subject: [PATCH] Filter search index text. --- Signal/test/util/SearcherTest.swift | 6 +++ .../src/Storage/FullTextSearchFinder.swift | 52 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/Signal/test/util/SearcherTest.swift b/Signal/test/util/SearcherTest.swift index 0fcba4d57..2472a754e 100644 --- a/Signal/test/util/SearcherTest.swift +++ b/Signal/test/util/SearcherTest.swift @@ -341,4 +341,10 @@ class SearcherTest: XCTestCase { XCTAssert(searcher.matches(item: stinkingLizaveta, query: "Liza 323")) XCTAssertFalse(searcher.matches(item: regularLizaveta, query: "Liza 323")) } + + func testTextSanitization() { + XCTAssertEqual(FullTextSearchFinder.normalize(text: "Liza"), "Liza") + XCTAssertEqual(FullTextSearchFinder.normalize(text: "Liza +1-323"), "Liza 1 323") + XCTAssertEqual(FullTextSearchFinder.normalize(text: "\"\\'!&@#$%^&*()Liza +1-323"), "Liza 1 323") + } } diff --git a/SignalServiceKit/src/Storage/FullTextSearchFinder.swift b/SignalServiceKit/src/Storage/FullTextSearchFinder.swift index e7bca46cf..b830d32c8 100644 --- a/SignalServiceKit/src/Storage/FullTextSearchFinder.swift +++ b/SignalServiceKit/src/Storage/FullTextSearchFinder.swift @@ -28,6 +28,7 @@ public class SearchIndexer { normalized = String(String.UnicodeScalarView(nonformattingScalars)) return normalized +// return FullTextSearchFinder.filterIndexOrQueryText(text: indexingText) } } @@ -64,6 +65,57 @@ public class FullTextSearchFinder: NSObject { } } + // Mark: Filtering + +// private class func characterSet(fromCharacter: UInt32, toCharacter: UInt32) -> CharacterSet { +// var string = "" +// // Add to include last character. +// for character in fromCharacter ..< toCharacter + 1 { +// guard let chr = Unicode.Scalar(character) else { +// assertionFailure("\(self.logTag) could not parse character.") +// continue +// } +// string += String(chr) +// } +// return CharacterSet(charactersIn: string) +// } +// +// private static var kFilterCharacters: CharacterSet = { +// var set = CharacterSet() +// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 0, toCharacter: 31)) +// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 33, toCharacter: 47)) +// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 58, toCharacter: 64)) +// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 91, toCharacter: 96)) +// set.formUnion(FullTextSearchFinder.characterSet(fromCharacter: 123, toCharacter: 126)) +// return set +// }() +// +// public class func filterIndexOrQueryText(text: String) -> String { +// let filteredScalars = String(text.unicodeScalars.lazy.map { +// if kFilterCharacters.contains($0) { +// return " " +// } else { +// return Character($0) +// } +// }) +// +// // Remove any phone number formatting from the search terms +// let nonformattingScalars = filteredScalars.unicodeScalars.lazy.filter { +// !CharacterSet.punctuationCharacters.contains($0) +// } +// +// var normalized = String(String.UnicodeScalarView(nonformattingScalars)) +// +// // Simplify the normalized text by combining adjacent whitespace. +// while normalized.contains(" ") { +// normalized = normalized.replacingOccurrences(of: " ", with: " ") +// } +// +// // We strip leading & trailing whitespace last, since we may replace +// // filtered characters with whitespace. +// return normalized.trimmingCharacters(in: .whitespacesAndNewlines) +// } + private func normalize(queryText: String) -> String { var normalized: String = queryText.trimmingCharacters(in: .whitespacesAndNewlines)