add thread reconstruction: simplified JWZ with merge support
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,101 @@
|
||||
import Foundation
|
||||
import GRDB
|
||||
|
||||
/// Simplified JWZ thread reconstruction.
|
||||
/// Links messages by Message-ID, In-Reply-To, and References headers.
|
||||
/// No subject-based fallback (produces false matches).
|
||||
public struct ThreadReconstructor: Sendable {
|
||||
private let store: MailStore
|
||||
|
||||
public init(store: MailStore) {
|
||||
self.store = store
|
||||
}
|
||||
|
||||
/// Process newly inserted messages and assign them to threads.
|
||||
public func processMessages(_ messages: [MessageRecord]) throws {
|
||||
for message in messages {
|
||||
try processOneMessage(message)
|
||||
}
|
||||
}
|
||||
|
||||
private func processOneMessage(_ message: MessageRecord) throws {
|
||||
// Collect all related Message-IDs from In-Reply-To and References
|
||||
var relatedIds = Set<String>()
|
||||
if let inReplyTo = message.inReplyTo, !inReplyTo.isEmpty {
|
||||
relatedIds.insert(inReplyTo)
|
||||
}
|
||||
if let refs = message.refs, !refs.isEmpty {
|
||||
for ref in refs.split(separator: " ") {
|
||||
let trimmed = ref.trimmingCharacters(in: .whitespaces)
|
||||
if !trimmed.isEmpty {
|
||||
relatedIds.insert(trimmed)
|
||||
}
|
||||
}
|
||||
}
|
||||
if let mid = message.messageId, !mid.isEmpty {
|
||||
relatedIds.insert(mid)
|
||||
}
|
||||
|
||||
// Find existing threads that contain any of these Message-IDs
|
||||
let matchingThreadIds = try store.findThreadsByMessageIds(relatedIds)
|
||||
|
||||
let threadId: String
|
||||
if matchingThreadIds.isEmpty {
|
||||
// No existing thread — create a new one
|
||||
threadId = UUID().uuidString
|
||||
let subject = stripReplyPrefix(message.subject)
|
||||
try store.insertThread(ThreadRecord(
|
||||
id: threadId,
|
||||
accountId: message.accountId,
|
||||
subject: subject,
|
||||
lastDate: message.date,
|
||||
messageCount: 1
|
||||
))
|
||||
} else if matchingThreadIds.count == 1 {
|
||||
// Exactly one matching thread — add to it
|
||||
threadId = matchingThreadIds[0]
|
||||
try updateThreadMetadata(threadId: threadId, newMessage: message)
|
||||
} else {
|
||||
// Multiple matching threads — merge them, then add message
|
||||
try store.mergeThreads(matchingThreadIds)
|
||||
threadId = matchingThreadIds[0]
|
||||
try updateThreadMetadata(threadId: threadId, newMessage: message)
|
||||
}
|
||||
|
||||
// Link message to thread
|
||||
try store.linkMessageToThread(threadId: threadId, messageId: message.id)
|
||||
}
|
||||
|
||||
private func updateThreadMetadata(threadId: String, newMessage: MessageRecord) throws {
|
||||
let existingMessageIds = try store.threadMessageIds(threadId: threadId)
|
||||
let newCount = existingMessageIds.count + 1
|
||||
let threads = try store.threads(accountId: newMessage.accountId)
|
||||
let currentThread = threads.first { $0.id == threadId }
|
||||
let lastDate = max(currentThread?.lastDate ?? "", newMessage.date)
|
||||
try store.updateThread(
|
||||
id: threadId,
|
||||
lastDate: lastDate,
|
||||
messageCount: newCount,
|
||||
subject: nil
|
||||
)
|
||||
}
|
||||
|
||||
/// Strip Re:, Fwd:, and similar prefixes for thread subject normalization
|
||||
private func stripReplyPrefix(_ subject: String?) -> String? {
|
||||
guard var s = subject else { return nil }
|
||||
let prefixes = ["re:", "fwd:", "fw:"]
|
||||
var changed = true
|
||||
while changed {
|
||||
changed = false
|
||||
let trimmed = s.trimmingCharacters(in: .whitespaces)
|
||||
for prefix in prefixes {
|
||||
if trimmed.lowercased().hasPrefix(prefix) {
|
||||
s = String(trimmed.dropFirst(prefix.count))
|
||||
changed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return s.trimmingCharacters(in: .whitespaces)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
import Testing
|
||||
import GRDB
|
||||
@testable import MailStore
|
||||
|
||||
@Suite("ThreadReconstructor")
|
||||
struct ThreadReconstructorTests {
|
||||
func makeStore() throws -> MailStore {
|
||||
try MailStore(dbWriter: DatabaseSetup.openInMemoryDatabase())
|
||||
}
|
||||
|
||||
func seedAccount(_ store: MailStore) throws {
|
||||
try store.insertAccount(AccountRecord(
|
||||
id: "acc1", name: "Test", email: "me@example.com",
|
||||
imapHost: "imap.example.com", imapPort: 993
|
||||
))
|
||||
try store.upsertMailbox(MailboxRecord(
|
||||
id: "mb1", accountId: "acc1", name: "INBOX", uidValidity: 1, uidNext: 100
|
||||
))
|
||||
}
|
||||
|
||||
func makeMessage(
|
||||
id: String, messageId: String?, inReplyTo: String? = nil,
|
||||
refs: String? = nil, subject: String = "Test", date: String = "2024-03-08T10:00:00Z"
|
||||
) -> MessageRecord {
|
||||
MessageRecord(
|
||||
id: id, accountId: "acc1", mailboxId: "mb1", uid: Int.random(in: 1...99999),
|
||||
messageId: messageId, inReplyTo: inReplyTo, refs: refs,
|
||||
subject: subject, fromAddress: "alice@example.com", fromName: "Alice",
|
||||
toAddresses: nil, ccAddresses: nil,
|
||||
date: date, snippet: nil, bodyText: nil, bodyHtml: nil,
|
||||
isRead: false, isFlagged: false, size: 100
|
||||
)
|
||||
}
|
||||
|
||||
@Test("creates new thread for standalone message")
|
||||
func standaloneMessage() throws {
|
||||
let store = try makeStore()
|
||||
try seedAccount(store)
|
||||
let msg = makeMessage(id: "m1", messageId: "msg001@example.com")
|
||||
try store.insertMessages([msg])
|
||||
let reconstructor = ThreadReconstructor(store: store)
|
||||
try reconstructor.processMessages([msg])
|
||||
let threads = try store.threads(accountId: "acc1")
|
||||
#expect(threads.count == 1)
|
||||
#expect(threads[0].messageCount == 1)
|
||||
}
|
||||
|
||||
@Test("groups reply into same thread via In-Reply-To")
|
||||
func replyByInReplyTo() throws {
|
||||
let store = try makeStore()
|
||||
try seedAccount(store)
|
||||
let msg1 = makeMessage(id: "m1", messageId: "msg001@example.com", date: "2024-03-08T10:00:00Z")
|
||||
let msg2 = makeMessage(
|
||||
id: "m2", messageId: "msg002@example.com",
|
||||
inReplyTo: "msg001@example.com",
|
||||
subject: "Re: Test", date: "2024-03-08T11:00:00Z"
|
||||
)
|
||||
try store.insertMessages([msg1, msg2])
|
||||
let reconstructor = ThreadReconstructor(store: store)
|
||||
try reconstructor.processMessages([msg1])
|
||||
try reconstructor.processMessages([msg2])
|
||||
let threads = try store.threads(accountId: "acc1")
|
||||
#expect(threads.count == 1)
|
||||
#expect(threads[0].messageCount == 2)
|
||||
}
|
||||
|
||||
@Test("groups reply into same thread via References")
|
||||
func replyByReferences() throws {
|
||||
let store = try makeStore()
|
||||
try seedAccount(store)
|
||||
let msg1 = makeMessage(id: "m1", messageId: "msg001@example.com", date: "2024-03-08T10:00:00Z")
|
||||
let msg2 = makeMessage(
|
||||
id: "m2", messageId: "msg003@example.com",
|
||||
refs: "msg001@example.com msg002@example.com",
|
||||
date: "2024-03-08T12:00:00Z"
|
||||
)
|
||||
try store.insertMessages([msg1, msg2])
|
||||
let reconstructor = ThreadReconstructor(store: store)
|
||||
try reconstructor.processMessages([msg1])
|
||||
try reconstructor.processMessages([msg2])
|
||||
let threads = try store.threads(accountId: "acc1")
|
||||
#expect(threads.count == 1)
|
||||
}
|
||||
|
||||
@Test("merges threads when new message connects them")
|
||||
func mergeThreads() throws {
|
||||
let store = try makeStore()
|
||||
try seedAccount(store)
|
||||
let msg1 = makeMessage(id: "m1", messageId: "msg001@example.com", date: "2024-03-08T10:00:00Z")
|
||||
let msg2 = makeMessage(id: "m2", messageId: "msg002@example.com", date: "2024-03-08T11:00:00Z")
|
||||
try store.insertMessages([msg1, msg2])
|
||||
let reconstructor = ThreadReconstructor(store: store)
|
||||
try reconstructor.processMessages([msg1])
|
||||
try reconstructor.processMessages([msg2])
|
||||
// two separate threads
|
||||
#expect(try store.threads(accountId: "acc1").count == 2)
|
||||
// msg3 references both, merging the threads
|
||||
let msg3 = makeMessage(
|
||||
id: "m3", messageId: "msg003@example.com",
|
||||
refs: "msg001@example.com msg002@example.com",
|
||||
date: "2024-03-08T12:00:00Z"
|
||||
)
|
||||
try store.insertMessages([msg3])
|
||||
try reconstructor.processMessages([msg3])
|
||||
#expect(try store.threads(accountId: "acc1").count == 1)
|
||||
#expect(try store.threads(accountId: "acc1")[0].messageCount == 3)
|
||||
}
|
||||
|
||||
@Test("message without messageId gets its own thread")
|
||||
func noMessageId() throws {
|
||||
let store = try makeStore()
|
||||
try seedAccount(store)
|
||||
let msg = makeMessage(id: "m1", messageId: nil)
|
||||
try store.insertMessages([msg])
|
||||
let reconstructor = ThreadReconstructor(store: store)
|
||||
try reconstructor.processMessages([msg])
|
||||
let threads = try store.threads(accountId: "acc1")
|
||||
#expect(threads.count == 1)
|
||||
#expect(threads[0].messageCount == 1)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user