add MIMEParser: multipart parsing, content decoding, boundary generation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-14 12:09:10 +01:00
parent cd7d39de9e
commit aac7c7c0af
2 changed files with 538 additions and 2 deletions

View File

@@ -1 +1,405 @@
// MIMEParser module
import Foundation
public enum MIMEParser {
// MARK: - Public API
/// Parse a raw MIME message into a structured tree of parts
public static func parse(_ rawMessage: String) -> MIMEMessage {
let (headers, body) = splitHeadersAndBody(rawMessage)
let contentType = headers["content-type"] ?? "text/plain"
if contentType.lowercased().contains("multipart/") {
guard let boundary = extractBoundary(contentType) else {
// Malformed: multipart without boundary treat as plain text
return MIMEMessage(
headers: headers,
textBody: body.trimmingCharacters(in: .whitespacesAndNewlines)
)
}
let parts = splitOnBoundary(body, boundary: boundary)
let parsedParts = parts.enumerated().map { (index, partString) in
parsePart(partString, sectionPrefix: "", index: index + 1)
}
var message = MIMEMessage(headers: headers, parts: parsedParts)
extractBodiesAndAttachments(from: parsedParts, contentType: contentType, into: &message, sectionPrefix: "")
return message
} else {
// Single-part message
let transferEncoding = parseTransferEncoding(headers["content-transfer-encoding"])
let decoded = decodeContent(body, encoding: transferEncoding)
if contentType.lowercased().contains("text/html") {
return MIMEMessage(headers: headers, htmlBody: String(data: decoded, encoding: .utf8))
} else {
return MIMEMessage(
headers: headers,
textBody: String(data: decoded, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
)
}
}
}
/// Decode content based on Content-Transfer-Encoding
public static func decodeContent(_ content: String, encoding: TransferEncoding) -> Data {
switch encoding {
case .base64:
let cleaned = content.filter { !$0.isWhitespace }
return Data(base64Encoded: cleaned) ?? Data(content.utf8)
case .quotedPrintable:
return decodeQuotedPrintable(content)
case .sevenBit, .eightBit, .binary:
return Data(content.utf8)
}
}
/// Generate a unique MIME boundary string
public static func generateBoundary() -> String {
"=_MagnumOpus_\(UUID().uuidString)"
}
// MARK: - Header Parsing
private static func splitHeadersAndBody(_ raw: String) -> ([String: String], String) {
// Split on first blank line (CRLF CRLF or LF LF)
let separator: String
if raw.contains("\r\n\r\n") {
separator = "\r\n\r\n"
} else if raw.contains("\n\n") {
separator = "\n\n"
} else {
return ([:], raw)
}
guard let range = raw.range(of: separator) else {
return ([:], raw)
}
let headerSection = String(raw[raw.startIndex..<range.lowerBound])
let bodySection = String(raw[range.upperBound...])
return (parseHeaders(headerSection), bodySection)
}
private static func parseHeaders(_ section: String) -> [String: String] {
var headers: [String: String] = [:]
let lineBreak = section.contains("\r\n") ? "\r\n" : "\n"
let lines = section.components(separatedBy: lineBreak)
var currentKey: String?
var currentValue: String = ""
for line in lines {
if line.isEmpty { continue }
if line.first == " " || line.first == "\t" {
// Continuation of previous header (folded)
currentValue += " " + line.trimmingCharacters(in: .whitespaces)
} else if let colonIndex = line.firstIndex(of: ":") {
// Save previous header
if let key = currentKey {
headers[key.lowercased()] = currentValue
}
currentKey = String(line[..<colonIndex]).trimmingCharacters(in: .whitespaces)
currentValue = String(line[line.index(after: colonIndex)...]).trimmingCharacters(in: .whitespaces)
}
}
// Save last header
if let key = currentKey {
headers[key.lowercased()] = currentValue
}
return headers
}
// MARK: - Boundary / Part Splitting
private static func extractBoundary(_ contentType: String) -> String? {
// Look for boundary="value" or boundary=value
let lower = contentType.lowercased()
guard let boundaryRange = lower.range(of: "boundary=") else { return nil }
var value = String(contentType[boundaryRange.upperBound...])
// Strip leading quote
if value.hasPrefix("\"") {
value = String(value.dropFirst())
if let endQuote = value.firstIndex(of: "\"") {
value = String(value[..<endQuote])
}
} else {
// Unquoted stop at semicolon or whitespace
if let end = value.firstIndex(where: { $0 == ";" || $0.isWhitespace }) {
value = String(value[..<end])
}
}
return value
}
private static func splitOnBoundary(_ body: String, boundary: String) -> [String] {
let delimiter = "--\(boundary)"
let terminator = "--\(boundary)--"
let lineBreak = body.contains("\r\n") ? "\r\n" : "\n"
var parts: [String] = []
let lines = body.components(separatedBy: lineBreak)
var currentPart: [String]? = nil
for line in lines {
let trimmed = line.trimmingCharacters(in: .whitespaces)
if trimmed == terminator || trimmed.hasPrefix(terminator) {
if let part = currentPart {
parts.append(part.joined(separator: lineBreak))
}
break
} else if trimmed == delimiter || trimmed.hasPrefix(delimiter) {
if let part = currentPart {
parts.append(part.joined(separator: lineBreak))
}
currentPart = []
} else if currentPart != nil {
currentPart!.append(line)
}
}
return parts
}
// MARK: - Part Parsing
private static func parsePart(_ partString: String, sectionPrefix: String, index: Int) -> MIMEPart {
let (headers, body) = splitHeadersAndBody(partString)
let contentType = headers["content-type"] ?? "text/plain"
let transferEncoding = parseTransferEncoding(headers["content-transfer-encoding"])
let charset = extractParameter(contentType, name: "charset")
let disposition = parseDisposition(headers["content-disposition"])
let contentId = extractContentId(headers["content-id"])
var filename = extractParameter(headers["content-disposition"] ?? "", name: "filename")
if filename == nil {
filename = extractParameter(contentType, name: "name")
}
// Decode RFC 2047 encoded filenames
if let encoded = filename {
filename = RFC2047Decoder.decode(encoded)
}
let section = sectionPrefix.isEmpty ? "\(index)" : "\(sectionPrefix).\(index)"
// Check for nested multipart
if contentType.lowercased().contains("multipart/") {
if let boundary = extractBoundary(contentType) {
let subparts = splitOnBoundary(body, boundary: boundary)
let parsedSubparts = subparts.enumerated().map { (i, s) in
parsePart(s, sectionPrefix: section, index: i + 1)
}
return MIMEPart(
headers: headers,
contentType: contentType.components(separatedBy: ";").first?.trimmingCharacters(in: .whitespaces).lowercased() ?? contentType,
charset: charset,
transferEncoding: transferEncoding,
disposition: disposition,
filename: filename,
contentId: contentId,
body: Data(),
subparts: parsedSubparts
)
}
}
let decodedBody = decodeContent(body, encoding: transferEncoding)
let baseContentType = contentType.components(separatedBy: ";").first?.trimmingCharacters(in: .whitespaces).lowercased() ?? contentType
return MIMEPart(
headers: headers,
contentType: baseContentType,
charset: charset,
transferEncoding: transferEncoding,
disposition: disposition,
filename: filename,
contentId: contentId,
body: decodedBody,
subparts: []
)
}
// MARK: - Body & Attachment Extraction
private static func extractBodiesAndAttachments(
from parts: [MIMEPart],
contentType: String,
into message: inout MIMEMessage,
sectionPrefix: String
) {
let lowerType = contentType.lowercased()
if lowerType.contains("multipart/alternative") {
for part in parts {
if !part.subparts.isEmpty {
extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "")
} else if part.contentType == "text/plain" && message.textBody == nil {
message.textBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
} else if part.contentType == "text/html" && message.htmlBody == nil {
message.htmlBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
}
}
} else if lowerType.contains("multipart/related") {
// First part is the HTML body, rest are inline resources
for (index, part) in parts.enumerated() {
if index == 0 {
if !part.subparts.isEmpty {
extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "")
} else if part.contentType == "text/html" {
message.htmlBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
} else if part.contentType == "text/plain" {
message.textBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
}
} else {
let sectionIndex = sectionPrefix.isEmpty ? "\(index + 1)" : "\(sectionPrefix).\(index + 1)"
let attachment = MIMEAttachment(
filename: part.filename ?? "inline-\(index)",
mimeType: part.contentType,
size: estimateDecodedSize(part),
contentId: part.contentId,
sectionPath: sectionIndex,
isInline: true
)
message.inlineImages.append(attachment)
}
}
} else {
// multipart/mixed or unknown multipart
var bodyFound = false
for (index, part) in parts.enumerated() {
let sectionIndex = sectionPrefix.isEmpty ? "\(index + 1)" : "\(sectionPrefix).\(index + 1)"
if !part.subparts.isEmpty {
// Nested multipart recurse
extractBodiesAndAttachments(from: part.subparts, contentType: part.contentType, into: &message, sectionPrefix: "")
bodyFound = true
} else if !bodyFound && part.disposition != .attachment && part.contentType.hasPrefix("text/") {
if part.contentType == "text/html" {
message.htmlBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
} else {
message.textBody = String(data: part.body, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines)
}
bodyFound = true
} else if part.disposition == .attachment || part.filename != nil || !part.contentType.hasPrefix("text/") {
let attachment = MIMEAttachment(
filename: part.filename ?? "attachment-\(index + 1)",
mimeType: part.contentType,
size: estimateDecodedSize(part),
contentId: part.contentId,
sectionPath: sectionIndex,
isInline: part.disposition == .inline
)
if part.disposition == .inline {
message.inlineImages.append(attachment)
} else {
message.attachments.append(attachment)
}
}
}
}
}
// MARK: - Helper Functions
private static func parseTransferEncoding(_ value: String?) -> TransferEncoding {
guard let value = value?.trimmingCharacters(in: .whitespaces).lowercased() else { return .sevenBit }
return TransferEncoding(rawValue: value) ?? .sevenBit
}
private static func parseDisposition(_ value: String?) -> ContentDisposition? {
guard let value = value?.lowercased() else { return nil }
if value.hasPrefix("inline") { return .inline }
if value.hasPrefix("attachment") { return .attachment }
return nil
}
private static func extractParameter(_ headerValue: String, name: String) -> String? {
let lower = headerValue.lowercased()
let search = "\(name.lowercased())="
guard let range = lower.range(of: search) else { return nil }
var value = String(headerValue[range.upperBound...])
if value.hasPrefix("\"") {
value = String(value.dropFirst())
if let endQuote = value.firstIndex(of: "\"") {
value = String(value[..<endQuote])
}
} else {
if let end = value.firstIndex(where: { $0 == ";" || $0.isWhitespace }) {
value = String(value[..<end])
}
}
return value.isEmpty ? nil : value
}
private static func extractContentId(_ value: String?) -> String? {
guard var cid = value?.trimmingCharacters(in: .whitespaces) else { return nil }
if cid.hasPrefix("<") { cid = String(cid.dropFirst()) }
if cid.hasSuffix(">") { cid = String(cid.dropLast()) }
return cid.isEmpty ? nil : cid
}
private static func estimateDecodedSize(_ part: MIMEPart) -> Int {
// Body is already decoded at this point, so use body.count directly
return part.body.count
}
private static func decodeQuotedPrintable(_ input: String) -> Data {
var data = Data()
let lines = input.components(separatedBy: "\n")
for (lineIndex, line) in lines.enumerated() {
var processedLine = line
if processedLine.hasSuffix("\r") {
processedLine = String(processedLine.dropLast())
}
// Check for soft line break
if processedLine.hasSuffix("=") {
processedLine = String(processedLine.dropLast())
data.append(contentsOf: decodeQPLine(processedLine))
} else {
data.append(contentsOf: decodeQPLine(processedLine))
if lineIndex < lines.count - 1 {
data.append(contentsOf: "\r\n".utf8)
}
}
}
return data
}
private static func decodeQPLine(_ line: String) -> Data {
var data = Data()
var i = line.startIndex
while i < line.endIndex {
if line[i] == "=" {
let next1 = line.index(after: i)
guard next1 < line.endIndex else {
data.append(contentsOf: "=".utf8)
break
}
let next2 = line.index(after: next1)
guard next2 < line.endIndex else {
data.append(contentsOf: String(line[i...]).utf8)
break
}
let hex = String(line[next1...next2])
if let byte = UInt8(hex, radix: 16) {
data.append(byte)
i = line.index(after: next2)
} else {
data.append(contentsOf: "=".utf8)
i = next1
}
} else {
data.append(contentsOf: String(line[i]).utf8)
i = line.index(after: i)
}
}
return data
}
}

View File

@@ -1 +1,133 @@
// MIMEParser tests
import Testing
import Foundation
@testable import MIMEParser
@Suite("MIMEParser")
struct MIMEParserTests {
// MARK: - Content Decoding
@Test("decode base64 content")
func decodeBase64() {
let encoded = "SGVsbG8gV29ybGQ="
let data = MIMEParser.decodeContent(encoded, encoding: .base64)
#expect(String(data: data, encoding: .utf8) == "Hello World")
}
@Test("decode quoted-printable content")
func decodeQuotedPrintable() {
let encoded = "Gr=C3=BC=C3=9Fe"
let data = MIMEParser.decodeContent(encoded, encoding: .quotedPrintable)
#expect(String(data: data, encoding: .utf8) == "Grüße")
}
@Test("decode 7bit content passes through")
func decode7bit() {
let text = "Hello World"
let data = MIMEParser.decodeContent(text, encoding: .sevenBit)
#expect(String(data: data, encoding: .utf8) == "Hello World")
}
@Test("boundary generation produces unique strings with =_ prefix")
func boundaryGeneration() {
let b1 = MIMEParser.generateBoundary()
let b2 = MIMEParser.generateBoundary()
#expect(b1 != b2)
#expect(b1.hasPrefix("=_MagnumOpus_"))
#expect(b2.hasPrefix("=_MagnumOpus_"))
}
// MARK: - Single-part Parsing
@Test("parse single-part text/plain message")
func parseSinglePartText() {
let raw = "Content-Type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\nHello, this is the body."
let message = MIMEParser.parse(raw)
#expect(message.textBody == "Hello, this is the body.")
#expect(message.htmlBody == nil)
#expect(message.attachments.isEmpty)
}
// MARK: - Multipart Parsing
@Test("parse multipart/mixed with text and one attachment")
func parseMultipartMixed() {
let raw = "Content-Type: multipart/mixed; boundary=\"----boundary123\"\r\n\r\n------boundary123\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Transfer-Encoding: 7bit\r\n\r\nHello from the body.\r\n------boundary123\r\nContent-Type: application/pdf; name=\"report.pdf\"\r\nContent-Disposition: attachment; filename=\"report.pdf\"\r\nContent-Transfer-Encoding: base64\r\n\r\nSGVsbG8=\r\n------boundary123--"
let message = MIMEParser.parse(raw)
#expect(message.textBody == "Hello from the body.")
#expect(message.attachments.count == 1)
#expect(message.attachments.first?.filename == "report.pdf")
#expect(message.attachments.first?.mimeType == "application/pdf")
#expect(message.attachments.first?.sectionPath == "2")
#expect(message.attachments.first?.isInline == false)
}
@Test("parse multipart/alternative extracts text and html bodies")
func parseMultipartAlternative() {
let raw = "Content-Type: multipart/alternative; boundary=\"alt-boundary\"\r\n\r\n--alt-boundary\r\nContent-Type: text/plain; charset=utf-8\r\n\r\nPlain text body\r\n--alt-boundary\r\nContent-Type: text/html; charset=utf-8\r\n\r\n<p>HTML body</p>\r\n--alt-boundary--"
let message = MIMEParser.parse(raw)
#expect(message.textBody == "Plain text body")
#expect(message.htmlBody == "<p>HTML body</p>")
#expect(message.attachments.isEmpty)
}
@Test("parse multipart/related with inline image")
func parseMultipartRelated() {
let raw = "Content-Type: multipart/related; boundary=\"rel-boundary\"\r\n\r\n--rel-boundary\r\nContent-Type: text/html; charset=utf-8\r\n\r\n<p>Image: <img src=\"cid:img001\"></p>\r\n--rel-boundary\r\nContent-Type: image/png\r\nContent-ID: <img001>\r\nContent-Disposition: inline\r\nContent-Transfer-Encoding: base64\r\n\r\niVBORw0KGgo=\r\n--rel-boundary--"
let message = MIMEParser.parse(raw)
#expect(message.htmlBody == "<p>Image: <img src=\"cid:img001\"></p>")
#expect(message.inlineImages.count == 1)
#expect(message.inlineImages.first?.contentId == "img001")
#expect(message.inlineImages.first?.isInline == true)
}
@Test("parse nested multipart/mixed containing multipart/alternative")
func parseNestedMultipart() {
let raw = "Content-Type: multipart/mixed; boundary=\"outer\"\r\n\r\n--outer\r\nContent-Type: multipart/alternative; boundary=\"inner\"\r\n\r\n--inner\r\nContent-Type: text/plain\r\n\r\nPlain text\r\n--inner\r\nContent-Type: text/html\r\n\r\n<p>HTML</p>\r\n--inner--\r\n--outer\r\nContent-Type: application/pdf; name=\"doc.pdf\"\r\nContent-Disposition: attachment; filename=\"doc.pdf\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAA\r\n--outer--"
let message = MIMEParser.parse(raw)
#expect(message.textBody == "Plain text")
#expect(message.htmlBody == "<p>HTML</p>")
#expect(message.attachments.count == 1)
#expect(message.attachments.first?.filename == "doc.pdf")
}
@Test("section paths assigned correctly for nested parts")
func sectionPaths() {
let raw = "Content-Type: multipart/mixed; boundary=\"outer\"\r\n\r\n--outer\r\nContent-Type: text/plain\r\n\r\nBody text\r\n--outer\r\nContent-Type: application/pdf; name=\"a.pdf\"\r\nContent-Disposition: attachment; filename=\"a.pdf\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAA\r\n--outer\r\nContent-Type: image/jpeg; name=\"b.jpg\"\r\nContent-Disposition: attachment; filename=\"b.jpg\"\r\nContent-Transfer-Encoding: base64\r\n\r\nBBBB\r\n--outer--"
let message = MIMEParser.parse(raw)
#expect(message.attachments.count == 2)
#expect(message.attachments[0].sectionPath == "2")
#expect(message.attachments[1].sectionPath == "3")
}
@Test("extract filename from Content-Type name parameter when no Content-Disposition")
func filenameFromContentType() {
let raw = "Content-Type: multipart/mixed; boundary=\"bound\"\r\n\r\n--bound\r\nContent-Type: text/plain\r\n\r\nBody\r\n--bound\r\nContent-Type: application/octet-stream; name=\"data.bin\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAA\r\n--bound--"
let message = MIMEParser.parse(raw)
#expect(message.attachments.count == 1)
#expect(message.attachments.first?.filename == "data.bin")
}
@Test("estimate decoded size from base64 content")
func base64SizeEstimate() {
// 8 base64 chars = 6 decoded bytes
let raw = "Content-Type: multipart/mixed; boundary=\"bound\"\r\n\r\n--bound\r\nContent-Type: text/plain\r\n\r\nBody\r\n--bound\r\nContent-Type: application/pdf; name=\"f.pdf\"\r\nContent-Disposition: attachment; filename=\"f.pdf\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAAAAAA\r\n--bound--"
let message = MIMEParser.parse(raw)
#expect(message.attachments.first?.size == 6)
}
@Test("handle malformed MIME gracefully — missing boundary")
func malformedMissingBoundary() {
let raw = "Content-Type: multipart/mixed\r\n\r\nSome text without proper boundary markers."
let message = MIMEParser.parse(raw)
// Should not crash; treat as single-part
#expect(message.attachments.isEmpty)
}
@Test("RFC 2047 encoded filename decoded")
func rfc2047Filename() {
let raw = "Content-Type: multipart/mixed; boundary=\"bound\"\r\n\r\n--bound\r\nContent-Type: text/plain\r\n\r\nBody\r\n--bound\r\nContent-Type: application/pdf; name=\"=?utf-8?B?QmVyaWNodC5wZGY=?=\"\r\nContent-Disposition: attachment; filename=\"=?utf-8?B?QmVyaWNodC5wZGY=?=\"\r\nContent-Transfer-Encoding: base64\r\n\r\nAAAA\r\n--bound--"
let message = MIMEParser.parse(raw)
#expect(message.attachments.first?.filename == "Bericht.pdf")
}
}