Files
MagnumOpus/Packages/MagnumOpusCore/Sources/MIMEParser/MIMEParser.swift
Felix Förtsch 31ab18cb2a fix remaining code review issues, add HTML email rendering, charset-aware MIME decoding
- add charset-aware string decoding in MIMEParser (supports UTF-8, Latin-1, Windows-1252, etc.)
- fix prefetchBodies: remove broken ISO8601 date filter that prevented body fetching
- fix ensureBodyLoaded to use fetchFullMessage + MIMEParser instead of broken fetchBody
- add N+1 query fix: inboxMessagesExcludingDeferred uses SQL LEFT JOIN instead of per-message deferral check
- add inboxMessageCountExcludingDeferred for efficient perspective counts
- add unreadMessageCount, totalMessageCount queries to MailStore
- wire mailbox unread/total counts in loadMailboxes (were hardcoded to 0)
- add flag sync: reconcileFlags fetches flags for existing UIDs, updates local read/flagged state
- move account config from UserDefaults to Application Support file, auto-migrate existing config
- render HTML emails by default (toggle to plain text), render plain text as HTML for proper Unicode/emoji
- replace print() with os_log Logger in SyncCoordinator

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 10:05:19 +01:00

436 lines
15 KiB
Swift

import Foundation
public enum MIMEParser {
// MARK: - Public API
/// Parse a raw MIME message into a structured tree of parts
public static func parse(_ rawMessage: String) -> MIMEMessage {
let (headers, body) = splitHeadersAndBody(rawMessage)
let contentType = headers["content-type"] ?? "text/plain"
if contentType.lowercased().contains("multipart/") {
guard let boundary = extractBoundary(contentType) else {
// Malformed: multipart without boundary treat as plain text
return MIMEMessage(
headers: headers,
textBody: body.trimmingCharacters(in: .whitespacesAndNewlines)
)
}
let parts = splitOnBoundary(body, boundary: boundary)
let parsedParts = parts.enumerated().map { (index, partString) in
parsePart(partString, sectionPrefix: "", index: index + 1)
}
var message = MIMEMessage(headers: headers, parts: parsedParts)
extractBodiesAndAttachments(from: parsedParts, contentType: contentType, into: &message, sectionPrefix: "")
return message
} else {
// Single-part message
let transferEncoding = parseTransferEncoding(headers["content-transfer-encoding"])
let decoded = decodeContent(body, encoding: transferEncoding)
let charset = extractParameter(contentType, name: "charset")
if contentType.lowercased().contains("text/html") {
return MIMEMessage(headers: headers, htmlBody: decodeString(decoded, charset: charset))
} else {
return MIMEMessage(
headers: headers,
textBody: decodeString(decoded, charset: charset)?.trimmingCharacters(in: .whitespacesAndNewlines)
)
}
}
}
/// Decode content based on Content-Transfer-Encoding
public static func decodeContent(_ content: String, encoding: TransferEncoding) -> Data {
switch encoding {
case .base64:
let cleaned = content.filter { !$0.isWhitespace }
return Data(base64Encoded: cleaned) ?? Data(content.utf8)
case .quotedPrintable:
return decodeQuotedPrintable(content)
case .sevenBit, .eightBit, .binary:
return Data(content.utf8)
}
}
/// Generate a unique MIME boundary string
public static func generateBoundary() -> String {
"=_MagnumOpus_\(UUID().uuidString)"
}
// MARK: - Header Parsing
private static func splitHeadersAndBody(_ raw: String) -> ([String: String], String) {
// Split on first blank line (CRLF CRLF or LF LF)
let separator: String
if raw.contains("\r\n\r\n") {
separator = "\r\n\r\n"
} else if raw.contains("\n\n") {
separator = "\n\n"
} else {
return ([:], raw)
}
guard let range = raw.range(of: separator) else {
return ([:], raw)
}
let headerSection = String(raw[raw.startIndex..<range.lowerBound])
let bodySection = String(raw[range.upperBound...])
return (parseHeaders(headerSection), bodySection)
}
private static func parseHeaders(_ section: String) -> [String: String] {
var headers: [String: String] = [:]
let lineBreak = section.contains("\r\n") ? "\r\n" : "\n"
let lines = section.components(separatedBy: lineBreak)
var currentKey: String?
var currentValue: String = ""
for line in lines {
if line.isEmpty { continue }
if line.first == " " || line.first == "\t" {
// Continuation of previous header (folded)
currentValue += " " + line.trimmingCharacters(in: .whitespaces)
} else if let colonIndex = line.firstIndex(of: ":") {
// Save previous header
if let key = currentKey {
headers[key.lowercased()] = currentValue
}
currentKey = String(line[..<colonIndex]).trimmingCharacters(in: .whitespaces)
currentValue = String(line[line.index(after: colonIndex)...]).trimmingCharacters(in: .whitespaces)
}
}
// Save last header
if let key = currentKey {
headers[key.lowercased()] = currentValue
}
return headers
}
// MARK: - Boundary / Part Splitting
private static func extractBoundary(_ contentType: String) -> String? {
// Look for boundary="value" or boundary=value
let lower = contentType.lowercased()
guard let boundaryRange = lower.range(of: "boundary=") else { return nil }
var value = String(contentType[boundaryRange.upperBound...])
// Strip leading quote
if value.hasPrefix("\"") {
value = String(value.dropFirst())
if let endQuote = value.firstIndex(of: "\"") {
value = String(value[..<endQuote])
}
} else {
// Unquoted stop at semicolon or whitespace
if let end = value.firstIndex(where: { $0 == ";" || $0.isWhitespace }) {
value = String(value[..<end])
}
}
return value
}
private static func splitOnBoundary(_ body: String, boundary: String) -> [String] {
let delimiter = "--\(boundary)"
let terminator = "--\(boundary)--"
let lineBreak = body.contains("\r\n") ? "\r\n" : "\n"
var parts: [String] = []
let lines = body.components(separatedBy: lineBreak)
var currentPart: [String]? = nil
for line in lines {
let trimmed = line.trimmingCharacters(in: .whitespaces)
if trimmed == terminator || trimmed.hasPrefix(terminator) {
if let part = currentPart {
parts.append(part.joined(separator: lineBreak))
}
break
} else if trimmed == delimiter || trimmed.hasPrefix(delimiter) {
if let part = currentPart {
parts.append(part.joined(separator: lineBreak))
}
currentPart = []
} else if currentPart != nil {
currentPart!.append(line)
}
}
return parts
}
// MARK: - Part Parsing
private static func parsePart(_ partString: String, sectionPrefix: String, index: Int) -> MIMEPart {
let (headers, body) = splitHeadersAndBody(partString)
let contentType = headers["content-type"] ?? "text/plain"
let transferEncoding = parseTransferEncoding(headers["content-transfer-encoding"])
let charset = extractParameter(contentType, name: "charset")
let disposition = parseDisposition(headers["content-disposition"])
let contentId = extractContentId(headers["content-id"])
var filename = extractParameter(headers["content-disposition"] ?? "", name: "filename")
if filename == nil {
filename = extractParameter(contentType, name: "name")
}
// Decode RFC 2047 encoded filenames
if let encoded = filename {
filename = RFC2047Decoder.decode(encoded)
}
let section = sectionPrefix.isEmpty ? "\(index)" : "\(sectionPrefix).\(index)"
// Check for nested multipart
if contentType.lowercased().contains("multipart/") {
if let boundary = extractBoundary(contentType) {
let subparts = splitOnBoundary(body, boundary: boundary)
let parsedSubparts = subparts.enumerated().map { (i, s) in
parsePart(s, sectionPrefix: section, index: i + 1)
}
return MIMEPart(
headers: headers,
contentType: contentType.components(separatedBy: ";").first?.trimmingCharacters(in: .whitespaces).lowercased() ?? contentType,
charset: charset,
transferEncoding: transferEncoding,
disposition: disposition,
filename: filename,
contentId: contentId,
body: Data(),
subparts: parsedSubparts
)
}
}
let decodedBody = decodeContent(body, encoding: transferEncoding)
let baseContentType = contentType.components(separatedBy: ";").first?.trimmingCharacters(in: .whitespaces).lowercased() ?? contentType
return MIMEPart(
headers: headers,
contentType: baseContentType,
charset: charset,
transferEncoding: transferEncoding,
disposition: disposition,
filename: filename,
contentId: contentId,
body: decodedBody,
subparts: []
)
}
// MARK: - Body & Attachment Extraction
private static func extractBodiesAndAttachments(
from parts: [MIMEPart],
contentType: String,
into message: inout MIMEMessage,
sectionPrefix: String
) {
let lowerType = contentType.lowercased()
if lowerType.contains("multipart/alternative") {
for part in parts {
if !part.subparts.isEmpty {
extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "")
} else if part.contentType == "text/plain" && message.textBody == nil {
message.textBody = decodeString(part.body, charset: part.charset)?.trimmingCharacters(in: .whitespacesAndNewlines)
} else if part.contentType == "text/html" && message.htmlBody == nil {
message.htmlBody = decodeString(part.body, charset: part.charset)?.trimmingCharacters(in: .whitespacesAndNewlines)
}
}
} else if lowerType.contains("multipart/related") {
// First part is the HTML body, rest are inline resources
for (index, part) in parts.enumerated() {
if index == 0 {
if !part.subparts.isEmpty {
extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "")
} else if part.contentType == "text/html" {
message.htmlBody = decodeString(part.body, charset: part.charset)?.trimmingCharacters(in: .whitespacesAndNewlines)
} else if part.contentType == "text/plain" {
message.textBody = decodeString(part.body, charset: part.charset)?.trimmingCharacters(in: .whitespacesAndNewlines)
}
} else {
let sectionIndex = sectionPrefix.isEmpty ? "\(index + 1)" : "\(sectionPrefix).\(index + 1)"
let attachment = MIMEAttachment(
filename: part.filename ?? "inline-\(index)",
mimeType: part.contentType,
size: estimateDecodedSize(part),
contentId: part.contentId,
sectionPath: sectionIndex,
isInline: true
)
message.inlineImages.append(attachment)
}
}
} else {
// multipart/mixed or unknown multipart
var bodyFound = false
for (index, part) in parts.enumerated() {
let sectionIndex = sectionPrefix.isEmpty ? "\(index + 1)" : "\(sectionPrefix).\(index + 1)"
if !part.subparts.isEmpty {
// Nested multipart recurse
extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "")
bodyFound = true
} else if !bodyFound && part.disposition != .attachment && part.contentType.hasPrefix("text/") {
if part.contentType == "text/html" {
message.htmlBody = decodeString(part.body, charset: part.charset)?.trimmingCharacters(in: .whitespacesAndNewlines)
} else {
message.textBody = decodeString(part.body, charset: part.charset)?.trimmingCharacters(in: .whitespacesAndNewlines)
}
bodyFound = true
} else if part.disposition == .attachment || part.filename != nil || !part.contentType.hasPrefix("text/") {
let attachment = MIMEAttachment(
filename: part.filename ?? "attachment-\(index + 1)",
mimeType: part.contentType,
size: estimateDecodedSize(part),
contentId: part.contentId,
sectionPath: sectionIndex,
isInline: part.disposition == .inline
)
if part.disposition == .inline {
message.inlineImages.append(attachment)
} else {
message.attachments.append(attachment)
}
}
}
}
}
// MARK: - String Decoding
/// Decode Data to String using the specified charset, falling back to UTF-8.
public static func decodeString(_ data: Data, charset: String?) -> String? {
let encoding = charsetToEncoding(charset)
if let result = String(data: data, encoding: encoding) {
return result
}
// Fallback: try UTF-8, then Latin-1 (which never fails)
return String(data: data, encoding: .utf8) ?? String(data: data, encoding: .isoLatin1)
}
private static func charsetToEncoding(_ charset: String?) -> String.Encoding {
guard let charset = charset?.lowercased().trimmingCharacters(in: .whitespaces) else { return .utf8 }
switch charset {
case "utf-8", "utf8": return .utf8
case "iso-8859-1", "latin1", "iso_8859-1": return .isoLatin1
case "iso-8859-2", "latin2": return .isoLatin2
case "iso-8859-15": return .isoLatin1 // close enough
case "windows-1252", "cp1252": return .windowsCP1252
case "windows-1251", "cp1251": return .windowsCP1251
case "us-ascii", "ascii": return .ascii
case "utf-16", "utf16": return .utf16
case "utf-16be": return .utf16BigEndian
case "utf-16le": return .utf16LittleEndian
default: return .utf8
}
}
// MARK: - Helper Functions
private static func parseTransferEncoding(_ value: String?) -> TransferEncoding {
guard let value = value?.trimmingCharacters(in: .whitespaces).lowercased() else { return .sevenBit }
return TransferEncoding(rawValue: value) ?? .sevenBit
}
private static func parseDisposition(_ value: String?) -> ContentDisposition? {
guard let value = value?.lowercased() else { return nil }
if value.hasPrefix("inline") { return .inline }
if value.hasPrefix("attachment") { return .attachment }
return nil
}
private static func extractParameter(_ headerValue: String, name: String) -> String? {
let lower = headerValue.lowercased()
let search = "\(name.lowercased())="
guard let range = lower.range(of: search) else { return nil }
var value = String(headerValue[range.upperBound...])
if value.hasPrefix("\"") {
value = String(value.dropFirst())
if let endQuote = value.firstIndex(of: "\"") {
value = String(value[..<endQuote])
}
} else {
if let end = value.firstIndex(where: { $0 == ";" || $0.isWhitespace }) {
value = String(value[..<end])
}
}
return value.isEmpty ? nil : value
}
private static func extractContentId(_ value: String?) -> String? {
guard var cid = value?.trimmingCharacters(in: .whitespaces) else { return nil }
if cid.hasPrefix("<") { cid = String(cid.dropFirst()) }
if cid.hasSuffix(">") { cid = String(cid.dropLast()) }
return cid.isEmpty ? nil : cid
}
private static func estimateDecodedSize(_ part: MIMEPart) -> Int {
// Body is already decoded at this point, so use body.count directly
return part.body.count
}
private static func decodeQuotedPrintable(_ input: String) -> Data {
var data = Data()
let lines = input.components(separatedBy: "\n")
for (lineIndex, line) in lines.enumerated() {
var processedLine = line
if processedLine.hasSuffix("\r") {
processedLine = String(processedLine.dropLast())
}
// Check for soft line break
if processedLine.hasSuffix("=") {
processedLine = String(processedLine.dropLast())
data.append(contentsOf: decodeQPLine(processedLine))
} else {
data.append(contentsOf: decodeQPLine(processedLine))
if lineIndex < lines.count - 1 {
data.append(contentsOf: "\r\n".utf8)
}
}
}
return data
}
private static func decodeQPLine(_ line: String) -> Data {
var data = Data()
var i = line.startIndex
while i < line.endIndex {
if line[i] == "=" {
let next1 = line.index(after: i)
guard next1 < line.endIndex else {
data.append(contentsOf: "=".utf8)
break
}
let next2 = line.index(after: next1)
guard next2 < line.endIndex else {
data.append(contentsOf: String(line[i...]).utf8)
break
}
let hex = String(line[next1...next2])
if let byte = UInt8(hex, radix: 16) {
data.append(byte)
i = line.index(after: next2)
} else {
data.append(contentsOf: "=".utf8)
i = next1
}
} else {
data.append(contentsOf: String(line[i]).utf8)
i = line.index(after: i)
}
}
return data
}
}