diff --git a/Signal/test/util/SearcherTest.swift b/Signal/test/util/SearcherTest.swift index 0fcba4d57..d8e1aa5c0 100644 --- a/Signal/test/util/SearcherTest.swift +++ b/Signal/test/util/SearcherTest.swift @@ -64,6 +64,12 @@ class FakeContactsManager: NSObject, ContactsManagerProtocol { func isSystemContact(withSignalAccount recipientId: String) -> Bool { return true } + + func compare(signalAccount left: SignalAccount, with right: SignalAccount) -> ComparisonResult { + owsFail("if this method ends up being used by the tests, we should provide a better implementation.") + + return .orderedAscending + } } let bobRecipientId = "+49030183000" @@ -131,6 +137,12 @@ class ConversationSearcherTest: XCTestCase { let goodbyeBookClub = TSOutgoingMessage(in: bookClubGroupThread, messageBody: "Goodbye Book Club", attachmentId: nil) goodbyeBookClub.save(with: transaction) + + let bobsPhoneNumber = TSOutgoingMessage(in: bookClubGroupThread, messageBody: "My phone number is: 321-321-4321", attachmentId: nil) + bobsPhoneNumber.save(with: transaction) + + let bobsFaxNumber = TSOutgoingMessage(in: bookClubGroupThread, messageBody: "My fax is: 222-333-4444", attachmentId: nil) + bobsFaxNumber.save(with: transaction) } } @@ -181,17 +193,17 @@ class ConversationSearcherTest: XCTestCase { // Exact match threads = searchConversations(searchText: aliceRecipientId) XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) // Partial match threads = searchConversations(searchText: "+123456") XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) // Prefixes threads = searchConversations(searchText: "12345678900") XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) threads = searchConversations(searchText: "49") XCTAssertEqual(1, threads.count) @@ -199,15 +211,19 @@ class ConversationSearcherTest: XCTestCase { threads = searchConversations(searchText: "1-234-56") XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) threads = searchConversations(searchText: "123456") XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) threads = searchConversations(searchText: "1.234.56") XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) + + threads = searchConversations(searchText: "1 234 56") + XCTAssertEqual(3, threads.count) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) } func testSearchContactByNumberWithoutCountryCode() { @@ -215,11 +231,11 @@ class ConversationSearcherTest: XCTestCase { // Phone Number formatting should be forgiving threads = searchConversations(searchText: "234.56") XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) threads = searchConversations(searchText: "234 56") XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) } func testSearchConversationByContactByName() { @@ -227,7 +243,7 @@ class ConversationSearcherTest: XCTestCase { threads = searchConversations(searchText: "Alice") XCTAssertEqual(3, threads.count) - XCTAssertEqual([bookClubThread, snackClubThread, aliceThread], threads) + XCTAssertEqual([bookClubThread, aliceThread, snackClubThread], threads) threads = searchConversations(searchText: "Bob") XCTAssertEqual(1, threads.count) @@ -255,6 +271,86 @@ class ConversationSearcherTest: XCTestCase { XCTAssert(resultSet.messages.map { $0.thread }.contains(bookClubThread)) } + func testSearchEdgeCases() { + var resultSet: SearchResultSet = .empty + + resultSet = getResultSet(searchText: "Hello Alice") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "hello alice") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "Hel") + XCTAssertEqual(2, resultSet.messages.count) + XCTAssertEqual(["Hello Alice", "Hello Book Club"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "Hel Ali") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "Hel Ali Alic") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "Ali Hel") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "CLU") + XCTAssertEqual(2, resultSet.messages.count) + XCTAssertEqual(["Goodbye Book Club", "Hello Book Club"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "hello !@##!@#!$^@!@#! alice") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["Hello Alice"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "3213 phone") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["My phone number is: 321-321-4321"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "PHO 3213") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["My phone number is: 321-321-4321"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "fax") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["My fax is: 222-333-4444"], bodies(forMessageResults: resultSet.messages)) + + resultSet = getResultSet(searchText: "fax 2223") + XCTAssertEqual(1, resultSet.messages.count) + XCTAssertEqual(["My fax is: 222-333-4444"], bodies(forMessageResults: resultSet.messages)) + } + + func bodies(forMessageResults messageResults: [ConversationSearchResult]) -> [String] { + var result = [String]() + + self.dbConnection.read { transaction in + for messageResult in messageResults { + guard let messageId = messageResult.messageId else { + owsFail("message result missing message id") + continue + } + guard let interaction = TSInteraction.fetch(uniqueId: messageId, transaction: transaction) else { + owsFail("couldn't load interaction for message result") + continue + } + guard let message = interaction as? TSMessage else { + owsFail("invalid message for message result") + continue + } + guard let messageBody = message.body else { + owsFail("message result missing message body") + continue + } + result.append(messageBody) + } + } + + return result.sorted() + } + // Mark: Helpers private func searchConversations(searchText: String) -> [ThreadViewModel] { @@ -265,7 +361,7 @@ class ConversationSearcherTest: XCTestCase { private func getResultSet(searchText: String) -> SearchResultSet { var results: SearchResultSet! self.dbConnection.read { transaction in - results = self.searcher.results(searchText: searchText, transaction: transaction) + results = self.searcher.results(searchText: searchText, transaction: transaction, contactsManager: TextSecureKitEnv.shared().contactsManager) } return results } @@ -341,4 +437,24 @@ class SearcherTest: XCTestCase { XCTAssert(searcher.matches(item: stinkingLizaveta, query: "Liza 323")) XCTAssertFalse(searcher.matches(item: regularLizaveta, query: "Liza 323")) } + + func testSearchQuery() { + XCTAssertEqual(FullTextSearchFinder.query(searchText: "Liza"), "\"Liza\"*") + XCTAssertEqual(FullTextSearchFinder.query(searchText: "Liza +1-323"), "\"1323\"* \"Liza\"*") + XCTAssertEqual(FullTextSearchFinder.query(searchText: "\"\\ `~!@#$%^&*()_+-={}|[]:;'<>?,./Liza +1-323"), "\"1323\"* \"Liza\"*") + XCTAssertEqual(FullTextSearchFinder.query(searchText: "renaldo RENALDO reñaldo REÑALDO"), "\"RENALDO\"* \"REÑALDO\"* \"renaldo\"* \"reñaldo\"*") + XCTAssertEqual(FullTextSearchFinder.query(searchText: "😏"), "\"😏\"*") + XCTAssertEqual(FullTextSearchFinder.query(searchText: "alice 123 bob 456"), "\"123456\"* \"alice\"* \"bob\"*") + XCTAssertEqual(FullTextSearchFinder.query(searchText: "Li!za"), "\"Liza\"*") + XCTAssertEqual(FullTextSearchFinder.query(searchText: "Liza Liza"), "\"Liza\"*") + XCTAssertEqual(FullTextSearchFinder.query(searchText: "Liza liza"), "\"Liza\"* \"liza\"*") + } + + func testTextNormalization() { + XCTAssertEqual(FullTextSearchFinder.normalize(text: "Liza"), "Liza") + XCTAssertEqual(FullTextSearchFinder.normalize(text: "Liza +1-323"), "Liza 1323") + XCTAssertEqual(FullTextSearchFinder.normalize(text: "\"\\ `~!@#$%^&*()_+-={}|[]:;'<>?,./Liza +1-323"), "Liza 1323") + XCTAssertEqual(FullTextSearchFinder.normalize(text: "renaldo RENALDO reñaldo REÑALDO"), "renaldo RENALDO reñaldo REÑALDO") + XCTAssertEqual(FullTextSearchFinder.normalize(text: "😏"), "😏") + } } diff --git a/SignalMessaging/contacts/OWSContactsManager.h b/SignalMessaging/contacts/OWSContactsManager.h index 7dc66d88c..3a34898ba 100644 --- a/SignalMessaging/contacts/OWSContactsManager.h +++ b/SignalMessaging/contacts/OWSContactsManager.h @@ -71,7 +71,6 @@ extern NSString *const OWSContactsManagerSignalAccountsDidChangeNotification; * Used for sorting, respects system contacts name sort order preference. */ - (NSString *)comparableNameForSignalAccount:(SignalAccount *)signalAccount; -- (NSComparisonResult)compareSignalAccount:(SignalAccount *)left withSignalAccount:(SignalAccount *)right; // Generally we prefer the formattedProfileName over the raw profileName so as to // distinguish a profile name apart from a name pulled from the system's contacts. diff --git a/SignalMessaging/utils/ConversationSearcher.swift b/SignalMessaging/utils/ConversationSearcher.swift index ff72a4d55..70cb56c63 100644 --- a/SignalMessaging/utils/ConversationSearcher.swift +++ b/SignalMessaging/utils/ConversationSearcher.swift @@ -39,13 +39,13 @@ public class ConversationSearchResult: Comparable { public class ContactSearchResult: Comparable { public let signalAccount: SignalAccount - public let contactsManager: OWSContactsManager + public let contactsManager: ContactsManagerProtocol public var recipientId: String { return signalAccount.recipientId } - init(signalAccount: SignalAccount, contactsManager: OWSContactsManager) { + init(signalAccount: SignalAccount, contactsManager: ContactsManagerProtocol) { self.signalAccount = signalAccount self.contactsManager = contactsManager } @@ -53,7 +53,7 @@ public class ContactSearchResult: Comparable { // Mark: Comparable public static func < (lhs: ContactSearchResult, rhs: ContactSearchResult) -> Bool { - return lhs.contactsManager.compareSignalAccount(lhs.signalAccount, with: rhs.signalAccount) == .orderedAscending + return lhs.contactsManager.compare(signalAccount: lhs.signalAccount, with: rhs.signalAccount) == .orderedAscending } // MARK: Equatable @@ -99,7 +99,7 @@ public class ConversationSearcher: NSObject { public func results(searchText: String, transaction: YapDatabaseReadTransaction, - contactsManager: OWSContactsManager) -> SearchResultSet { + contactsManager: ContactsManagerProtocol) -> SearchResultSet { var conversations: [ConversationSearchResult] = [] var contacts: [ContactSearchResult] = [] diff --git a/SignalServiceKit/src/Protocols/ContactsManagerProtocol.h b/SignalServiceKit/src/Protocols/ContactsManagerProtocol.h index 64d7e0335..7a6e5cc69 100644 --- a/SignalServiceKit/src/Protocols/ContactsManagerProtocol.h +++ b/SignalServiceKit/src/Protocols/ContactsManagerProtocol.h @@ -17,6 +17,9 @@ NS_ASSUME_NONNULL_BEGIN - (BOOL)isSystemContact:(NSString *)recipientId; - (BOOL)isSystemContactWithSignalAccount:(NSString *)recipientId; +- (NSComparisonResult)compareSignalAccount:(SignalAccount *)left + withSignalAccount:(SignalAccount *)right NS_SWIFT_NAME(compare(signalAccount:with:)); + @end NS_ASSUME_NONNULL_END diff --git a/SignalServiceKit/src/Storage/FullTextSearchFinder.swift b/SignalServiceKit/src/Storage/FullTextSearchFinder.swift index e7bca46cf..ab1042aad 100644 --- a/SignalServiceKit/src/Storage/FullTextSearchFinder.swift +++ b/SignalServiceKit/src/Storage/FullTextSearchFinder.swift @@ -18,23 +18,66 @@ public class SearchIndexer { } private func normalize(indexingText: String) -> String { - var normalized: String = indexingText.trimmingCharacters(in: .whitespacesAndNewlines) - - // Remove any punctuation from the search index - let nonformattingScalars = normalized.unicodeScalars.lazy.filter { - !CharacterSet.punctuationCharacters.contains($0) - } - - normalized = String(String.UnicodeScalarView(nonformattingScalars)) - - return normalized + return FullTextSearchFinder.normalize(text: indexingText) } } @objc public class FullTextSearchFinder: NSObject { - // Mark: Querying + // MARK: - Querying + + // We want to match by prefix for "search as you type" functionality. + // SQLite does not support suffix or contains matches. + public class func query(searchText: String) -> String { + // 1. Normalize the search text. + // + // TODO: We could arguably convert to lowercase since the search + // is case-insensitive. + let normalizedSearchText = FullTextSearchFinder.normalize(text: searchText) + + // 2. Split the non-numeric text into query terms (or tokens). + let nonNumericText = String(String.UnicodeScalarView(normalizedSearchText.unicodeScalars.lazy.map { + if CharacterSet.decimalDigits.contains($0) { + return " " + } else { + return $0 + } + })) + var queryTerms = nonNumericText.split(separator: " ") + + // 3. Add an additional numeric-only query term. + let digitsOnlyScalars = normalizedSearchText.unicodeScalars.lazy.filter { + CharacterSet.decimalDigits.contains($0) + } + let digitsOnly: Substring = Substring(String(String.UnicodeScalarView(digitsOnlyScalars))) + queryTerms.append(digitsOnly) + + // 4. De-duplicate and sort query terms. + // Duplicate terms are redundant. + // Sorting terms makes the output of this method deterministic and easier to test, + // and the order won't affect the search results. + queryTerms = Array(Set(queryTerms)).sorted() + + // 5. Filter the query terms. + let filteredQueryTerms = queryTerms.filter { + // Ignore empty terms. + $0.count > 0 + }.map { + // Allow partial match of each term. + // + // Note that we use double-quotes to enclose each search term. + // Quoted search terms can include a few more characters than + // "bareword" (non-quoted) search terms. This shouldn't matter, + // since we're filtering all of the affected characters, but + // quoting protects us from any bugs in that logic. + "\"\($0)\"*" + } + + // 6. Join terms into query string. + let query = filteredQueryTerms.joined(separator: " ") + return query + } public func enumerateObjects(searchText: String, transaction: YapDatabaseReadTransaction, block: @escaping (Any, String) -> Void) { guard let ext: YapDatabaseFullTextSearchTransaction = ext(transaction: transaction) else { @@ -42,50 +85,73 @@ public class FullTextSearchFinder: NSObject { return } - let normalized = normalize(queryText: searchText) + let query = FullTextSearchFinder.query(searchText: searchText) - // We want to match by prefix for "search as you type" functionality. - // SQLite does not support suffix or contains matches. - let prefixQuery = "\(normalized)*" + Logger.verbose("\(logTag) query: \(query)") let maxSearchResults = 500 var searchResultCount = 0 let snippetOptions = YapDatabaseFullTextSearchSnippetOptions() snippetOptions.startMatchText = "" snippetOptions.endMatchText = "" - ext.enumerateKeysAndObjects(matching: prefixQuery, with: snippetOptions) { (snippet: String, _: String, _: String, object: Any, stop: UnsafeMutablePointer) in + ext.enumerateKeysAndObjects(matching: query, with: snippetOptions) { (snippet: String, _: String, _: String, object: Any, stop: UnsafeMutablePointer) in guard searchResultCount < maxSearchResults else { stop.pointee = true return } - searchResultCount = searchResultCount + 1 + searchResultCount += 1 block(object, snippet) } } - private func normalize(queryText: String) -> String { - var normalized: String = queryText.trimmingCharacters(in: .whitespacesAndNewlines) - - // Remove any punctuation from the search terms - let nonformattingScalars = normalized.unicodeScalars.lazy.filter { - !CharacterSet.punctuationCharacters.contains($0) - } - let normalizedChars = String(String.UnicodeScalarView(nonformattingScalars)) - - let digitsOnlyScalars = normalized.unicodeScalars.lazy.filter { - CharacterSet.decimalDigits.contains($0) + // MARK: - Normalization + + fileprivate static var charactersToRemove: CharacterSet = { + // * We want to strip punctuation - and our definition of "punctuation" + // is broader than `CharacterSet.punctuationCharacters`. + // * FTS should be robust to (i.e. ignore) illegal and control characters, + // but it's safer if we filter them ourselves as well. + var charactersToFilter = CharacterSet.punctuationCharacters + charactersToFilter.formUnion(CharacterSet.illegalCharacters) + charactersToFilter.formUnion(CharacterSet.controlCharacters) + + // We want to strip all ASCII characters except: + // * Letters a-z, A-Z + // * Numerals 0-9 + // * Whitespace + var asciiToFilter = CharacterSet(charactersIn: UnicodeScalar(0x0)!.. String { + // 1. Filter out invalid characters. + let filtered = text.unicodeScalars.lazy.filter({ + !charactersToRemove.contains($0) + }) + + // 2. Simplify whitespace. + let simplifyingFunction: (UnicodeScalar) -> UnicodeScalar = { + if CharacterSet.whitespacesAndNewlines.contains($0) { + return UnicodeScalar(" ") + } else { + return $0 + } } - let normalizedDigits = String(String.UnicodeScalarView(digitsOnlyScalars)) + let simplified = filtered.map(simplifyingFunction) - if normalizedDigits.count > 0 { - return "\(normalizedChars) OR \(normalizedDigits)" - } else { - return "\(normalizedChars)" - } + // 3. Strip leading & trailing whitespace last, since we may replace + // filtered characters with whitespace. + let result = String(String.UnicodeScalarView(simplified)) + return result.trimmingCharacters(in: .whitespacesAndNewlines) } - // Mark: Index Building + // MARK: - Index Building private class var contactsManager: ContactsManagerProtocol { return TextSecureKitEnv.shared().contactsManager @@ -153,7 +219,7 @@ public class FullTextSearchFinder: NSObject { // MARK: - Extension Registration - private static let dbExtensionName: String = "FullTextSearchFinderExtension)" + private static let dbExtensionName: String = "FullTextSearchFinderExtension" private func ext(transaction: YapDatabaseReadTransaction) -> YapDatabaseFullTextSearchTransaction? { return transaction.ext(FullTextSearchFinder.dbExtensionName) as? YapDatabaseFullTextSearchTransaction @@ -179,8 +245,11 @@ public class FullTextSearchFinder: NSObject { } // update search index on contact name changes? - // update search index on message insertion? - return YapDatabaseFullTextSearch(columnNames: ["content"], handler: handler) + return YapDatabaseFullTextSearch(columnNames: ["content"], + options: nil, + handler: handler, + ftsVersion: YapDatabaseFullTextSearchFTS5Version, + versionTag: "1") } } diff --git a/SignalServiceKit/src/Storage/OWSStorage.m b/SignalServiceKit/src/Storage/OWSStorage.m index 8a61f5b2c..9f0d1ee11 100644 --- a/SignalServiceKit/src/Storage/OWSStorage.m +++ b/SignalServiceKit/src/Storage/OWSStorage.m @@ -542,10 +542,13 @@ NSString *const kNSUserDefaults_DatabaseExtensionVersionMap = @"kNSUserDefaults_ YapDatabaseFullTextSearch *fullTextSearch = (YapDatabaseFullTextSearch *)extension; NSString *versionTag = [self appendSuffixToDatabaseExtensionVersionIfNecessary:fullTextSearch.versionTag extensionName:extensionName]; - YapDatabaseFullTextSearch *fullTextSearchCopy = [[YapDatabaseFullTextSearch alloc] initWithColumnNames:fullTextSearch->columnNames.array - handler:fullTextSearch->handler - versionTag:versionTag]; - + YapDatabaseFullTextSearch *fullTextSearchCopy = + [[YapDatabaseFullTextSearch alloc] initWithColumnNames:fullTextSearch->columnNames.array + options:fullTextSearch->options + handler:fullTextSearch->handler + ftsVersion:fullTextSearch->ftsVersion + versionTag:versionTag]; + return fullTextSearchCopy; } else if ([extension isKindOfClass:[YapDatabaseCrossProcessNotification class]]) { // versionTag doesn't matter for YapDatabaseCrossProcessNotification.