From aac7c7c0af31d26692904e35a5664b598e57a62e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20F=C3=B6rtsch?= Date: Sat, 14 Mar 2026 12:09:10 +0100 Subject: [PATCH] add MIMEParser: multipart parsing, content decoding, boundary generation Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Sources/MIMEParser/MIMEParser.swift | 406 +++++++++++++++++- .../MIMEParserTests/MIMEParserTests.swift | 134 +++++- 2 files changed, 538 insertions(+), 2 deletions(-) diff --git a/Packages/MagnumOpusCore/Sources/MIMEParser/MIMEParser.swift b/Packages/MagnumOpusCore/Sources/MIMEParser/MIMEParser.swift index 5847c3a..26ae450 100644 --- a/Packages/MagnumOpusCore/Sources/MIMEParser/MIMEParser.swift +++ b/Packages/MagnumOpusCore/Sources/MIMEParser/MIMEParser.swift @@ -1 +1,405 @@ -// MIMEParser module +import Foundation + +public enum MIMEParser { + + // MARK: - Public API + + /// Parse a raw MIME message into a structured tree of parts + public static func parse(_ rawMessage: String) -> MIMEMessage { + let (headers, body) = splitHeadersAndBody(rawMessage) + let contentType = headers["content-type"] ?? "text/plain" + + if contentType.lowercased().contains("multipart/") { + guard let boundary = extractBoundary(contentType) else { + // Malformed: multipart without boundary — treat as plain text + return MIMEMessage( + headers: headers, + textBody: body.trimmingCharacters(in: .whitespacesAndNewlines) + ) + } + let parts = splitOnBoundary(body, boundary: boundary) + let parsedParts = parts.enumerated().map { (index, partString) in + parsePart(partString, sectionPrefix: "", index: index + 1) + } + + var message = MIMEMessage(headers: headers, parts: parsedParts) + extractBodiesAndAttachments(from: parsedParts, contentType: contentType, into: &message, sectionPrefix: "") + return message + } else { + // Single-part message + let transferEncoding = parseTransferEncoding(headers["content-transfer-encoding"]) + let decoded = decodeContent(body, encoding: transferEncoding) + + if contentType.lowercased().contains("text/html") { + return MIMEMessage(headers: headers, htmlBody: String(data: decoded, encoding: .utf8)) + } else { + return MIMEMessage( + headers: headers, + textBody: String(data: decoded, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) + ) + } + } + } + + /// Decode content based on Content-Transfer-Encoding + public static func decodeContent(_ content: String, encoding: TransferEncoding) -> Data { + switch encoding { + case .base64: + let cleaned = content.filter { !$0.isWhitespace } + return Data(base64Encoded: cleaned) ?? Data(content.utf8) + case .quotedPrintable: + return decodeQuotedPrintable(content) + case .sevenBit, .eightBit, .binary: + return Data(content.utf8) + } + } + + /// Generate a unique MIME boundary string + public static func generateBoundary() -> String { + "=_MagnumOpus_\(UUID().uuidString)" + } + + // MARK: - Header Parsing + + private static func splitHeadersAndBody(_ raw: String) -> ([String: String], String) { + // Split on first blank line (CRLF CRLF or LF LF) + let separator: String + if raw.contains("\r\n\r\n") { + separator = "\r\n\r\n" + } else if raw.contains("\n\n") { + separator = "\n\n" + } else { + return ([:], raw) + } + + guard let range = raw.range(of: separator) else { + return ([:], raw) + } + + let headerSection = String(raw[raw.startIndex.. [String: String] { + var headers: [String: String] = [:] + let lineBreak = section.contains("\r\n") ? "\r\n" : "\n" + let lines = section.components(separatedBy: lineBreak) + + var currentKey: String? + var currentValue: String = "" + + for line in lines { + if line.isEmpty { continue } + + if line.first == " " || line.first == "\t" { + // Continuation of previous header (folded) + currentValue += " " + line.trimmingCharacters(in: .whitespaces) + } else if let colonIndex = line.firstIndex(of: ":") { + // Save previous header + if let key = currentKey { + headers[key.lowercased()] = currentValue + } + currentKey = String(line[.. String? { + // Look for boundary="value" or boundary=value + let lower = contentType.lowercased() + guard let boundaryRange = lower.range(of: "boundary=") else { return nil } + var value = String(contentType[boundaryRange.upperBound...]) + + // Strip leading quote + if value.hasPrefix("\"") { + value = String(value.dropFirst()) + if let endQuote = value.firstIndex(of: "\"") { + value = String(value[.. [String] { + let delimiter = "--\(boundary)" + let terminator = "--\(boundary)--" + let lineBreak = body.contains("\r\n") ? "\r\n" : "\n" + + var parts: [String] = [] + let lines = body.components(separatedBy: lineBreak) + var currentPart: [String]? = nil + + for line in lines { + let trimmed = line.trimmingCharacters(in: .whitespaces) + if trimmed == terminator || trimmed.hasPrefix(terminator) { + if let part = currentPart { + parts.append(part.joined(separator: lineBreak)) + } + break + } else if trimmed == delimiter || trimmed.hasPrefix(delimiter) { + if let part = currentPart { + parts.append(part.joined(separator: lineBreak)) + } + currentPart = [] + } else if currentPart != nil { + currentPart!.append(line) + } + } + + return parts + } + + // MARK: - Part Parsing + + private static func parsePart(_ partString: String, sectionPrefix: String, index: Int) -> MIMEPart { + let (headers, body) = splitHeadersAndBody(partString) + let contentType = headers["content-type"] ?? "text/plain" + let transferEncoding = parseTransferEncoding(headers["content-transfer-encoding"]) + let charset = extractParameter(contentType, name: "charset") + let disposition = parseDisposition(headers["content-disposition"]) + let contentId = extractContentId(headers["content-id"]) + + var filename = extractParameter(headers["content-disposition"] ?? "", name: "filename") + if filename == nil { + filename = extractParameter(contentType, name: "name") + } + // Decode RFC 2047 encoded filenames + if let encoded = filename { + filename = RFC2047Decoder.decode(encoded) + } + + let section = sectionPrefix.isEmpty ? "\(index)" : "\(sectionPrefix).\(index)" + + // Check for nested multipart + if contentType.lowercased().contains("multipart/") { + if let boundary = extractBoundary(contentType) { + let subparts = splitOnBoundary(body, boundary: boundary) + let parsedSubparts = subparts.enumerated().map { (i, s) in + parsePart(s, sectionPrefix: section, index: i + 1) + } + return MIMEPart( + headers: headers, + contentType: contentType.components(separatedBy: ";").first?.trimmingCharacters(in: .whitespaces).lowercased() ?? contentType, + charset: charset, + transferEncoding: transferEncoding, + disposition: disposition, + filename: filename, + contentId: contentId, + body: Data(), + subparts: parsedSubparts + ) + } + } + + let decodedBody = decodeContent(body, encoding: transferEncoding) + let baseContentType = contentType.components(separatedBy: ";").first?.trimmingCharacters(in: .whitespaces).lowercased() ?? contentType + + return MIMEPart( + headers: headers, + contentType: baseContentType, + charset: charset, + transferEncoding: transferEncoding, + disposition: disposition, + filename: filename, + contentId: contentId, + body: decodedBody, + subparts: [] + ) + } + + // MARK: - Body & Attachment Extraction + + private static func extractBodiesAndAttachments( + from parts: [MIMEPart], + contentType: String, + into message: inout MIMEMessage, + sectionPrefix: String + ) { + let lowerType = contentType.lowercased() + + if lowerType.contains("multipart/alternative") { + for part in parts { + if !part.subparts.isEmpty { + extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "") + } else if part.contentType == "text/plain" && message.textBody == nil { + message.textBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) + } else if part.contentType == "text/html" && message.htmlBody == nil { + message.htmlBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) + } + } + } else if lowerType.contains("multipart/related") { + // First part is the HTML body, rest are inline resources + for (index, part) in parts.enumerated() { + if index == 0 { + if !part.subparts.isEmpty { + extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "") + } else if part.contentType == "text/html" { + message.htmlBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) + } else if part.contentType == "text/plain" { + message.textBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) + } + } else { + let sectionIndex = sectionPrefix.isEmpty ? "\(index + 1)" : "\(sectionPrefix).\(index + 1)" + let attachment = MIMEAttachment( + filename: part.filename ?? "inline-\(index)", + mimeType: part.contentType, + size: estimateDecodedSize(part), + contentId: part.contentId, + sectionPath: sectionIndex, + isInline: true + ) + message.inlineImages.append(attachment) + } + } + } else { + // multipart/mixed or unknown multipart + var bodyFound = false + for (index, part) in parts.enumerated() { + let sectionIndex = sectionPrefix.isEmpty ? "\(index + 1)" : "\(sectionPrefix).\(index + 1)" + + if !part.subparts.isEmpty { + // Nested multipart — recurse + extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "") + bodyFound = true + } else if !bodyFound && part.disposition != .attachment && part.contentType.hasPrefix("text/") { + if part.contentType == "text/html" { + message.htmlBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) + } else { + message.textBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) + } + bodyFound = true + } else if part.disposition == .attachment || part.filename != nil || !part.contentType.hasPrefix("text/") { + let attachment = MIMEAttachment( + filename: part.filename ?? "attachment-\(index + 1)", + mimeType: part.contentType, + size: estimateDecodedSize(part), + contentId: part.contentId, + sectionPath: sectionIndex, + isInline: part.disposition == .inline + ) + if part.disposition == .inline { + message.inlineImages.append(attachment) + } else { + message.attachments.append(attachment) + } + } + } + } + } + + // MARK: - Helper Functions + + private static func parseTransferEncoding(_ value: String?) -> TransferEncoding { + guard let value = value?.trimmingCharacters(in: .whitespaces).lowercased() else { return .sevenBit } + return TransferEncoding(rawValue: value) ?? .sevenBit + } + + private static func parseDisposition(_ value: String?) -> ContentDisposition? { + guard let value = value?.lowercased() else { return nil } + if value.hasPrefix("inline") { return .inline } + if value.hasPrefix("attachment") { return .attachment } + return nil + } + + private static func extractParameter(_ headerValue: String, name: String) -> String? { + let lower = headerValue.lowercased() + let search = "\(name.lowercased())=" + guard let range = lower.range(of: search) else { return nil } + var value = String(headerValue[range.upperBound...]) + if value.hasPrefix("\"") { + value = String(value.dropFirst()) + if let endQuote = value.firstIndex(of: "\"") { + value = String(value[.. String? { + guard var cid = value?.trimmingCharacters(in: .whitespaces) else { return nil } + if cid.hasPrefix("<") { cid = String(cid.dropFirst()) } + if cid.hasSuffix(">") { cid = String(cid.dropLast()) } + return cid.isEmpty ? nil : cid + } + + private static func estimateDecodedSize(_ part: MIMEPart) -> Int { + // Body is already decoded at this point, so use body.count directly + return part.body.count + } + + private static func decodeQuotedPrintable(_ input: String) -> Data { + var data = Data() + let lines = input.components(separatedBy: "\n") + + for (lineIndex, line) in lines.enumerated() { + var processedLine = line + if processedLine.hasSuffix("\r") { + processedLine = String(processedLine.dropLast()) + } + + // Check for soft line break + if processedLine.hasSuffix("=") { + processedLine = String(processedLine.dropLast()) + data.append(contentsOf: decodeQPLine(processedLine)) + } else { + data.append(contentsOf: decodeQPLine(processedLine)) + if lineIndex < lines.count - 1 { + data.append(contentsOf: "\r\n".utf8) + } + } + } + + return data + } + + private static func decodeQPLine(_ line: String) -> Data { + var data = Data() + var i = line.startIndex + while i < line.endIndex { + if line[i] == "=" { + let next1 = line.index(after: i) + guard next1 < line.endIndex else { + data.append(contentsOf: "=".utf8) + break + } + let next2 = line.index(after: next1) + guard next2 < line.endIndex else { + data.append(contentsOf: String(line[i...]).utf8) + break + } + let hex = String(line[next1...next2]) + if let byte = UInt8(hex, radix: 16) { + data.append(byte) + i = line.index(after: next2) + } else { + data.append(contentsOf: "=".utf8) + i = next1 + } + } else { + data.append(contentsOf: String(line[i]).utf8) + i = line.index(after: i) + } + } + return data + } +} diff --git a/Packages/MagnumOpusCore/Tests/MIMEParserTests/MIMEParserTests.swift b/Packages/MagnumOpusCore/Tests/MIMEParserTests/MIMEParserTests.swift index 88c5faa..15c7618 100644 --- a/Packages/MagnumOpusCore/Tests/MIMEParserTests/MIMEParserTests.swift +++ b/Packages/MagnumOpusCore/Tests/MIMEParserTests/MIMEParserTests.swift @@ -1 +1,133 @@ -// MIMEParser tests +import Testing +import Foundation +@testable import MIMEParser + +@Suite("MIMEParser") +struct MIMEParserTests { + + // MARK: - Content Decoding + + @Test("decode base64 content") + func decodeBase64() { + let encoded = "SGVsbG8gV29ybGQ=" + let data = MIMEParser.decodeContent(encoded, encoding: .base64) + #expect(String(data: data, encoding: .utf8) == "Hello World") + } + + @Test("decode quoted-printable content") + func decodeQuotedPrintable() { + let encoded = "Gr=C3=BC=C3=9Fe" + let data = MIMEParser.decodeContent(encoded, encoding: .quotedPrintable) + #expect(String(data: data, encoding: .utf8) == "Grüße") + } + + @Test("decode 7bit content passes through") + func decode7bit() { + let text = "Hello World" + let data = MIMEParser.decodeContent(text, encoding: .sevenBit) + #expect(String(data: data, encoding: .utf8) == "Hello World") + } + + @Test("boundary generation produces unique strings with =_ prefix") + func boundaryGeneration() { + let b1 = MIMEParser.generateBoundary() + let b2 = MIMEParser.generateBoundary() + #expect(b1 != b2) + #expect(b1.hasPrefix("=_MagnumOpus_")) + #expect(b2.hasPrefix("=_MagnumOpus_")) + } + + // MARK: - Single-part Parsing + + @Test("parse single-part text/plain message") + func parseSinglePartText() { + let raw = "Content-Type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\nHello, this is the body." + let message = MIMEParser.parse(raw) + #expect(message.textBody == "Hello, this is the body.") + #expect(message.htmlBody == nil) + #expect(message.attachments.isEmpty) + } + + // MARK: - Multipart Parsing + + @Test("parse multipart/mixed with text and one attachment") + func parseMultipartMixed() { + let raw = "Content-Type: multipart/mixed; boundary=\"----boundary123\"\r\n\r\n------boundary123\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\nHello from the body.\r\n------boundary123\r\nContent-Type: application/pdf; name=\"report.pdf\"\r\nContent-Disposition: attachment; filename=\"report.pdf\"\r\nContent-Transfer-Encoding: base64\r\n\r\nSGVsbG8=\r\n------boundary123--" + let message = MIMEParser.parse(raw) + #expect(message.textBody == "Hello from the body.") + #expect(message.attachments.count == 1) + #expect(message.attachments.first?.filename == "report.pdf") + #expect(message.attachments.first?.mimeType == "application/pdf") + #expect(message.attachments.first?.sectionPath == "2") + #expect(message.attachments.first?.isInline == false) + } + + @Test("parse multipart/alternative extracts text and html bodies") + func parseMultipartAlternative() { + let raw = "Content-Type: multipart/alternative; boundary=\"alt-boundary\"\r\n\r\n--alt-boundary\r\nContent-Type: text/plain; charset=utf-8\r\n\r\nPlain text body\r\n--alt-boundary\r\nContent-Type: text/html; charset=utf-8\r\n\r\n

HTML body

\r\n--alt-boundary--" + let message = MIMEParser.parse(raw) + #expect(message.textBody == "Plain text body") + #expect(message.htmlBody == "

HTML body

") + #expect(message.attachments.isEmpty) + } + + @Test("parse multipart/related with inline image") + func parseMultipartRelated() { + let raw = "Content-Type: multipart/related; boundary=\"rel-boundary\"\r\n\r\n--rel-boundary\r\nContent-Type: text/html; charset=utf-8\r\n\r\n

Image:

\r\n--rel-boundary\r\nContent-Type: image/png\r\nContent-ID: \r\nContent-Disposition: inline\r\nContent-Transfer-Encoding: base64\r\n\r\niVBORw0KGgo=\r\n--rel-boundary--" + let message = MIMEParser.parse(raw) + #expect(message.htmlBody == "

Image:

") + #expect(message.inlineImages.count == 1) + #expect(message.inlineImages.first?.contentId == "img001") + #expect(message.inlineImages.first?.isInline == true) + } + + @Test("parse nested multipart/mixed containing multipart/alternative") + func parseNestedMultipart() { + let raw = "Content-Type: multipart/mixed; boundary=\"outer\"\r\n\r\n--outer\r\nContent-Type: multipart/alternative; boundary=\"inner\"\r\n\r\n--inner\r\nContent-Type: text/plain\r\n\r\nPlain text\r\n--inner\r\nContent-Type: text/html\r\n\r\n

HTML

\r\n--inner--\r\n--outer\r\nContent-Type: application/pdf; name=\"doc.pdf\"\r\nContent-Disposition: attachment; filename=\"doc.pdf\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAA\r\n--outer--" + let message = MIMEParser.parse(raw) + #expect(message.textBody == "Plain text") + #expect(message.htmlBody == "

HTML

") + #expect(message.attachments.count == 1) + #expect(message.attachments.first?.filename == "doc.pdf") + } + + @Test("section paths assigned correctly for nested parts") + func sectionPaths() { + let raw = "Content-Type: multipart/mixed; boundary=\"outer\"\r\n\r\n--outer\r\nContent-Type: text/plain\r\n\r\nBody text\r\n--outer\r\nContent-Type: application/pdf; name=\"a.pdf\"\r\nContent-Disposition: attachment; filename=\"a.pdf\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAA\r\n--outer\r\nContent-Type: image/jpeg; name=\"b.jpg\"\r\nContent-Disposition: attachment; filename=\"b.jpg\"\r\nContent-Transfer-Encoding: base64\r\n\r\nBBBB\r\n--outer--" + let message = MIMEParser.parse(raw) + #expect(message.attachments.count == 2) + #expect(message.attachments[0].sectionPath == "2") + #expect(message.attachments[1].sectionPath == "3") + } + + @Test("extract filename from Content-Type name parameter when no Content-Disposition") + func filenameFromContentType() { + let raw = "Content-Type: multipart/mixed; boundary=\"bound\"\r\n\r\n--bound\r\nContent-Type: text/plain\r\n\r\nBody\r\n--bound\r\nContent-Type: application/octet-stream; name=\"data.bin\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAA\r\n--bound--" + let message = MIMEParser.parse(raw) + #expect(message.attachments.count == 1) + #expect(message.attachments.first?.filename == "data.bin") + } + + @Test("estimate decoded size from base64 content") + func base64SizeEstimate() { + // 8 base64 chars = 6 decoded bytes + let raw = "Content-Type: multipart/mixed; boundary=\"bound\"\r\n\r\n--bound\r\nContent-Type: text/plain\r\n\r\nBody\r\n--bound\r\nContent-Type: application/pdf; name=\"f.pdf\"\r\nContent-Disposition: attachment; filename=\"f.pdf\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAAAAAA\r\n--bound--" + let message = MIMEParser.parse(raw) + #expect(message.attachments.first?.size == 6) + } + + @Test("handle malformed MIME gracefully — missing boundary") + func malformedMissingBoundary() { + let raw = "Content-Type: multipart/mixed\r\n\r\nSome text without proper boundary markers." + let message = MIMEParser.parse(raw) + // Should not crash; treat as single-part + #expect(message.attachments.isEmpty) + } + + @Test("RFC 2047 encoded filename decoded") + func rfc2047Filename() { + let raw = "Content-Type: multipart/mixed; boundary=\"bound\"\r\n\r\n--bound\r\nContent-Type: text/plain\r\n\r\nBody\r\n--bound\r\nContent-Type: application/pdf; name=\"=?utf-8?B?QmVyaWNodC5wZGY=?=\"\r\nContent-Disposition: attachment; filename=\"=?utf-8?B?QmVyaWNodC5wZGY=?=\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAA\r\n--bound--" + let message = MIMEParser.parse(raw) + #expect(message.attachments.first?.filename == "Bericht.pdf") + } +}