add thread reconstruction: simplified JWZ with merge support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-13 18:21:15 +01:00
parent abee491cec
commit ca3817c683
2 changed files with 222 additions and 0 deletions

View File

@@ -0,0 +1,101 @@
import Foundation
import GRDB
/// Simplified JWZ thread reconstruction.
/// Links messages by Message-ID, In-Reply-To, and References headers.
/// No subject-based fallback (produces false matches).
public struct ThreadReconstructor: Sendable {
private let store: MailStore
public init(store: MailStore) {
self.store = store
}
/// Process newly inserted messages and assign them to threads.
public func processMessages(_ messages: [MessageRecord]) throws {
for message in messages {
try processOneMessage(message)
}
}
private func processOneMessage(_ message: MessageRecord) throws {
// Collect all related Message-IDs from In-Reply-To and References
var relatedIds = Set<String>()
if let inReplyTo = message.inReplyTo, !inReplyTo.isEmpty {
relatedIds.insert(inReplyTo)
}
if let refs = message.refs, !refs.isEmpty {
for ref in refs.split(separator: " ") {
let trimmed = ref.trimmingCharacters(in: .whitespaces)
if !trimmed.isEmpty {
relatedIds.insert(trimmed)
}
}
}
if let mid = message.messageId, !mid.isEmpty {
relatedIds.insert(mid)
}
// Find existing threads that contain any of these Message-IDs
let matchingThreadIds = try store.findThreadsByMessageIds(relatedIds)
let threadId: String
if matchingThreadIds.isEmpty {
// No existing thread create a new one
threadId = UUID().uuidString
let subject = stripReplyPrefix(message.subject)
try store.insertThread(ThreadRecord(
id: threadId,
accountId: message.accountId,
subject: subject,
lastDate: message.date,
messageCount: 1
))
} else if matchingThreadIds.count == 1 {
// Exactly one matching thread add to it
threadId = matchingThreadIds[0]
try updateThreadMetadata(threadId: threadId, newMessage: message)
} else {
// Multiple matching threads merge them, then add message
try store.mergeThreads(matchingThreadIds)
threadId = matchingThreadIds[0]
try updateThreadMetadata(threadId: threadId, newMessage: message)
}
// Link message to thread
try store.linkMessageToThread(threadId: threadId, messageId: message.id)
}
private func updateThreadMetadata(threadId: String, newMessage: MessageRecord) throws {
let existingMessageIds = try store.threadMessageIds(threadId: threadId)
let newCount = existingMessageIds.count + 1
let threads = try store.threads(accountId: newMessage.accountId)
let currentThread = threads.first { $0.id == threadId }
let lastDate = max(currentThread?.lastDate ?? "", newMessage.date)
try store.updateThread(
id: threadId,
lastDate: lastDate,
messageCount: newCount,
subject: nil
)
}
/// Strip Re:, Fwd:, and similar prefixes for thread subject normalization
private func stripReplyPrefix(_ subject: String?) -> String? {
guard var s = subject else { return nil }
let prefixes = ["re:", "fwd:", "fw:"]
var changed = true
while changed {
changed = false
let trimmed = s.trimmingCharacters(in: .whitespaces)
for prefix in prefixes {
if trimmed.lowercased().hasPrefix(prefix) {
s = String(trimmed.dropFirst(prefix.count))
changed = true
break
}
}
}
return s.trimmingCharacters(in: .whitespaces)
}
}

View File

@@ -0,0 +1,121 @@
import Testing
import GRDB
@testable import MailStore
@Suite("ThreadReconstructor")
struct ThreadReconstructorTests {
func makeStore() throws -> MailStore {
try MailStore(dbWriter: DatabaseSetup.openInMemoryDatabase())
}
func seedAccount(_ store: MailStore) throws {
try store.insertAccount(AccountRecord(
id: "acc1", name: "Test", email: "me@example.com",
imapHost: "imap.example.com", imapPort: 993
))
try store.upsertMailbox(MailboxRecord(
id: "mb1", accountId: "acc1", name: "INBOX", uidValidity: 1, uidNext: 100
))
}
func makeMessage(
id: String, messageId: String?, inReplyTo: String? = nil,
refs: String? = nil, subject: String = "Test", date: String = "2024-03-08T10:00:00Z"
) -> MessageRecord {
MessageRecord(
id: id, accountId: "acc1", mailboxId: "mb1", uid: Int.random(in: 1...99999),
messageId: messageId, inReplyTo: inReplyTo, refs: refs,
subject: subject, fromAddress: "alice@example.com", fromName: "Alice",
toAddresses: nil, ccAddresses: nil,
date: date, snippet: nil, bodyText: nil, bodyHtml: nil,
isRead: false, isFlagged: false, size: 100
)
}
@Test("creates new thread for standalone message")
func standaloneMessage() throws {
let store = try makeStore()
try seedAccount(store)
let msg = makeMessage(id: "m1", messageId: "msg001@example.com")
try store.insertMessages([msg])
let reconstructor = ThreadReconstructor(store: store)
try reconstructor.processMessages([msg])
let threads = try store.threads(accountId: "acc1")
#expect(threads.count == 1)
#expect(threads[0].messageCount == 1)
}
@Test("groups reply into same thread via In-Reply-To")
func replyByInReplyTo() throws {
let store = try makeStore()
try seedAccount(store)
let msg1 = makeMessage(id: "m1", messageId: "msg001@example.com", date: "2024-03-08T10:00:00Z")
let msg2 = makeMessage(
id: "m2", messageId: "msg002@example.com",
inReplyTo: "msg001@example.com",
subject: "Re: Test", date: "2024-03-08T11:00:00Z"
)
try store.insertMessages([msg1, msg2])
let reconstructor = ThreadReconstructor(store: store)
try reconstructor.processMessages([msg1])
try reconstructor.processMessages([msg2])
let threads = try store.threads(accountId: "acc1")
#expect(threads.count == 1)
#expect(threads[0].messageCount == 2)
}
@Test("groups reply into same thread via References")
func replyByReferences() throws {
let store = try makeStore()
try seedAccount(store)
let msg1 = makeMessage(id: "m1", messageId: "msg001@example.com", date: "2024-03-08T10:00:00Z")
let msg2 = makeMessage(
id: "m2", messageId: "msg003@example.com",
refs: "msg001@example.com msg002@example.com",
date: "2024-03-08T12:00:00Z"
)
try store.insertMessages([msg1, msg2])
let reconstructor = ThreadReconstructor(store: store)
try reconstructor.processMessages([msg1])
try reconstructor.processMessages([msg2])
let threads = try store.threads(accountId: "acc1")
#expect(threads.count == 1)
}
@Test("merges threads when new message connects them")
func mergeThreads() throws {
let store = try makeStore()
try seedAccount(store)
let msg1 = makeMessage(id: "m1", messageId: "msg001@example.com", date: "2024-03-08T10:00:00Z")
let msg2 = makeMessage(id: "m2", messageId: "msg002@example.com", date: "2024-03-08T11:00:00Z")
try store.insertMessages([msg1, msg2])
let reconstructor = ThreadReconstructor(store: store)
try reconstructor.processMessages([msg1])
try reconstructor.processMessages([msg2])
// two separate threads
#expect(try store.threads(accountId: "acc1").count == 2)
// msg3 references both, merging the threads
let msg3 = makeMessage(
id: "m3", messageId: "msg003@example.com",
refs: "msg001@example.com msg002@example.com",
date: "2024-03-08T12:00:00Z"
)
try store.insertMessages([msg3])
try reconstructor.processMessages([msg3])
#expect(try store.threads(accountId: "acc1").count == 1)
#expect(try store.threads(accountId: "acc1")[0].messageCount == 3)
}
@Test("message without messageId gets its own thread")
func noMessageId() throws {
let store = try makeStore()
try seedAccount(store)
let msg = makeMessage(id: "m1", messageId: nil)
try store.insertMessages([msg])
let reconstructor = ThreadReconstructor(store: store)
try reconstructor.processMessages([msg])
let threads = try store.threads(accountId: "acc1")
#expect(threads.count == 1)
#expect(threads[0].messageCount == 1)
}
}