diff --git a/Packages/MagnumOpusCore/Sources/MIMEParser/RFC2047Decoder.swift b/Packages/MagnumOpusCore/Sources/MIMEParser/RFC2047Decoder.swift new file mode 100644 index 0000000..a3461e2 --- /dev/null +++ b/Packages/MagnumOpusCore/Sources/MIMEParser/RFC2047Decoder.swift @@ -0,0 +1,99 @@ +import Foundation + +public enum RFC2047Decoder { + /// Decode RFC 2047 encoded words in a string. + /// Pattern: =?charset?encoding?encoded_text?= + public static func decode(_ input: String) -> String { + let pattern = #"=\?([^?]+)\?([BbQq])\?([^?]*)\?="# + guard let regex = try? NSRegularExpression(pattern: pattern) else { + return input + } + + let nsInput = input as NSString + let matches = regex.matches(in: input, range: NSRange(location: 0, length: nsInput.length)) + + guard !matches.isEmpty else { return input } + + var result = "" + var lastEnd = 0 + + for match in matches { + let matchRange = match.range + // Add any non-encoded text between matches (skip whitespace between adjacent encoded words) + let gap = nsInput.substring(with: NSRange(location: lastEnd, length: matchRange.location - lastEnd)) + let trimmedGap = gap.trimmingCharacters(in: .whitespaces) + if !trimmedGap.isEmpty || lastEnd == 0 { + // Only add gap if it's not just whitespace between encoded words + if lastEnd == 0 && matchRange.location > 0 { + result += gap + } else if !trimmedGap.isEmpty { + result += gap + } + } + + let charset = nsInput.substring(with: match.range(at: 1)) + let encoding = nsInput.substring(with: match.range(at: 2)).uppercased() + let encodedText = nsInput.substring(with: match.range(at: 3)) + + let cfEncoding = CFStringConvertIANACharSetNameToEncoding(charset as CFString) + let nsEncoding = CFStringConvertEncodingToNSStringEncoding(cfEncoding) + + let decoded: String? + if encoding == "B" { + guard let data = Data(base64Encoded: encodedText) else { + result += nsInput.substring(with: matchRange) + lastEnd = matchRange.location + matchRange.length + continue + } + decoded = String(data: data, encoding: String.Encoding(rawValue: nsEncoding)) + } else { + // Q encoding: like quoted-printable but underscores represent spaces + let withSpaces = encodedText.replacingOccurrences(of: "_", with: " ") + let data = decodeQuotedPrintableBytes(withSpaces) + decoded = String(data: data, encoding: String.Encoding(rawValue: nsEncoding)) + } + + result += decoded ?? nsInput.substring(with: matchRange) + lastEnd = matchRange.location + matchRange.length + } + + // Append any trailing non-encoded text + if lastEnd < nsInput.length { + result += nsInput.substring(from: lastEnd) + } + + return result + } + + private static func decodeQuotedPrintableBytes(_ input: String) -> Data { + var data = Data() + var i = input.startIndex + while i < input.endIndex { + if input[i] == "=" { + let hexStart = input.index(after: i) + guard hexStart < input.endIndex else { + data.append(contentsOf: "=".utf8) + break + } + let hexEnd = input.index(hexStart, offsetBy: 1, limitedBy: input.endIndex) ?? input.endIndex + guard hexEnd < input.endIndex else { + data.append(contentsOf: String(input[i...]).utf8) + break + } + let nextAfterHex = input.index(after: hexEnd) + let hex = String(input[hexStart...hexEnd]) + if let byte = UInt8(hex, radix: 16) { + data.append(byte) + i = nextAfterHex + } else { + data.append(contentsOf: "=".utf8) + i = hexStart + } + } else { + data.append(contentsOf: String(input[i]).utf8) + i = input.index(after: i) + } + } + return data + } +} diff --git a/Packages/MagnumOpusCore/Tests/MIMEParserTests/RFC2047DecoderTests.swift b/Packages/MagnumOpusCore/Tests/MIMEParserTests/RFC2047DecoderTests.swift new file mode 100644 index 0000000..bc6ce4d --- /dev/null +++ b/Packages/MagnumOpusCore/Tests/MIMEParserTests/RFC2047DecoderTests.swift @@ -0,0 +1,51 @@ +import Testing +import Foundation +@testable import MIMEParser + +@Suite("RFC2047Decoder") +struct RFC2047DecoderTests { + + @Test("plain ASCII filename passes through unchanged") + func plainAscii() { + let result = RFC2047Decoder.decode("report.pdf") + #expect(result == "report.pdf") + } + + @Test("base64 encoded UTF-8 filename decoded correctly") + func base64Utf8() { + // "Bericht.pdf" in base64 + let encoded = "=?utf-8?B?QmVyaWNodC5wZGY=?=" + let result = RFC2047Decoder.decode(encoded) + #expect(result == "Bericht.pdf") + } + + @Test("quoted-printable encoded UTF-8 filename decoded correctly") + func quotedPrintableUtf8() { + // "Grüße.txt" — ü = =C3=BC, ß = =C3=9F + let encoded = "=?utf-8?Q?Gr=C3=BC=C3=9Fe.txt?=" + let result = RFC2047Decoder.decode(encoded) + #expect(result == "Grüße.txt") + } + + @Test("multiple encoded words concatenated") + func multipleEncodedWords() { + let encoded = "=?utf-8?B?SGVsbG8=?= =?utf-8?B?V29ybGQ=?=" + let result = RFC2047Decoder.decode(encoded) + #expect(result == "HelloWorld") + } + + @Test("ISO-8859-1 encoded filename decoded correctly") + func iso88591() { + // "café" — é = 0xE9 in ISO-8859-1, base64 of "café" in ISO-8859-1 is "Y2Fm6Q==" + let encoded = "=?iso-8859-1?B?Y2Fm6Q==?=" + let result = RFC2047Decoder.decode(encoded) + #expect(result == "café") + } + + @Test("underscores in Q-encoding replaced with spaces") + func qEncodingUnderscores() { + let encoded = "=?utf-8?Q?my_file_name.pdf?=" + let result = RFC2047Decoder.decode(encoded) + #expect(result == "my file name.pdf") + } +}