add RFC 2047 encoded word decoder for MIME filenames
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,99 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
public enum RFC2047Decoder {
|
||||||
|
/// Decode RFC 2047 encoded words in a string.
|
||||||
|
/// Pattern: =?charset?encoding?encoded_text?=
|
||||||
|
public static func decode(_ input: String) -> String {
|
||||||
|
let pattern = #"=\?([^?]+)\?([BbQq])\?([^?]*)\?="#
|
||||||
|
guard let regex = try? NSRegularExpression(pattern: pattern) else {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
let nsInput = input as NSString
|
||||||
|
let matches = regex.matches(in: input, range: NSRange(location: 0, length: nsInput.length))
|
||||||
|
|
||||||
|
guard !matches.isEmpty else { return input }
|
||||||
|
|
||||||
|
var result = ""
|
||||||
|
var lastEnd = 0
|
||||||
|
|
||||||
|
for match in matches {
|
||||||
|
let matchRange = match.range
|
||||||
|
// Add any non-encoded text between matches (skip whitespace between adjacent encoded words)
|
||||||
|
let gap = nsInput.substring(with: NSRange(location: lastEnd, length: matchRange.location - lastEnd))
|
||||||
|
let trimmedGap = gap.trimmingCharacters(in: .whitespaces)
|
||||||
|
if !trimmedGap.isEmpty || lastEnd == 0 {
|
||||||
|
// Only add gap if it's not just whitespace between encoded words
|
||||||
|
if lastEnd == 0 && matchRange.location > 0 {
|
||||||
|
result += gap
|
||||||
|
} else if !trimmedGap.isEmpty {
|
||||||
|
result += gap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let charset = nsInput.substring(with: match.range(at: 1))
|
||||||
|
let encoding = nsInput.substring(with: match.range(at: 2)).uppercased()
|
||||||
|
let encodedText = nsInput.substring(with: match.range(at: 3))
|
||||||
|
|
||||||
|
let cfEncoding = CFStringConvertIANACharSetNameToEncoding(charset as CFString)
|
||||||
|
let nsEncoding = CFStringConvertEncodingToNSStringEncoding(cfEncoding)
|
||||||
|
|
||||||
|
let decoded: String?
|
||||||
|
if encoding == "B" {
|
||||||
|
guard let data = Data(base64Encoded: encodedText) else {
|
||||||
|
result += nsInput.substring(with: matchRange)
|
||||||
|
lastEnd = matchRange.location + matchRange.length
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
decoded = String(data: data, encoding: String.Encoding(rawValue: nsEncoding))
|
||||||
|
} else {
|
||||||
|
// Q encoding: like quoted-printable but underscores represent spaces
|
||||||
|
let withSpaces = encodedText.replacingOccurrences(of: "_", with: " ")
|
||||||
|
let data = decodeQuotedPrintableBytes(withSpaces)
|
||||||
|
decoded = String(data: data, encoding: String.Encoding(rawValue: nsEncoding))
|
||||||
|
}
|
||||||
|
|
||||||
|
result += decoded ?? nsInput.substring(with: matchRange)
|
||||||
|
lastEnd = matchRange.location + matchRange.length
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append any trailing non-encoded text
|
||||||
|
if lastEnd < nsInput.length {
|
||||||
|
result += nsInput.substring(from: lastEnd)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func decodeQuotedPrintableBytes(_ input: String) -> Data {
|
||||||
|
var data = Data()
|
||||||
|
var i = input.startIndex
|
||||||
|
while i < input.endIndex {
|
||||||
|
if input[i] == "=" {
|
||||||
|
let hexStart = input.index(after: i)
|
||||||
|
guard hexStart < input.endIndex else {
|
||||||
|
data.append(contentsOf: "=".utf8)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
let hexEnd = input.index(hexStart, offsetBy: 1, limitedBy: input.endIndex) ?? input.endIndex
|
||||||
|
guard hexEnd < input.endIndex else {
|
||||||
|
data.append(contentsOf: String(input[i...]).utf8)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
let nextAfterHex = input.index(after: hexEnd)
|
||||||
|
let hex = String(input[hexStart...hexEnd])
|
||||||
|
if let byte = UInt8(hex, radix: 16) {
|
||||||
|
data.append(byte)
|
||||||
|
i = nextAfterHex
|
||||||
|
} else {
|
||||||
|
data.append(contentsOf: "=".utf8)
|
||||||
|
i = hexStart
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
data.append(contentsOf: String(input[i]).utf8)
|
||||||
|
i = input.index(after: i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
import Testing
|
||||||
|
import Foundation
|
||||||
|
@testable import MIMEParser
|
||||||
|
|
||||||
|
@Suite("RFC2047Decoder")
|
||||||
|
struct RFC2047DecoderTests {
|
||||||
|
|
||||||
|
@Test("plain ASCII filename passes through unchanged")
|
||||||
|
func plainAscii() {
|
||||||
|
let result = RFC2047Decoder.decode("report.pdf")
|
||||||
|
#expect(result == "report.pdf")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test("base64 encoded UTF-8 filename decoded correctly")
|
||||||
|
func base64Utf8() {
|
||||||
|
// "Bericht.pdf" in base64
|
||||||
|
let encoded = "=?utf-8?B?QmVyaWNodC5wZGY=?="
|
||||||
|
let result = RFC2047Decoder.decode(encoded)
|
||||||
|
#expect(result == "Bericht.pdf")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test("quoted-printable encoded UTF-8 filename decoded correctly")
|
||||||
|
func quotedPrintableUtf8() {
|
||||||
|
// "Grüße.txt" — ü = =C3=BC, ß = =C3=9F
|
||||||
|
let encoded = "=?utf-8?Q?Gr=C3=BC=C3=9Fe.txt?="
|
||||||
|
let result = RFC2047Decoder.decode(encoded)
|
||||||
|
#expect(result == "Grüße.txt")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test("multiple encoded words concatenated")
|
||||||
|
func multipleEncodedWords() {
|
||||||
|
let encoded = "=?utf-8?B?SGVsbG8=?= =?utf-8?B?V29ybGQ=?="
|
||||||
|
let result = RFC2047Decoder.decode(encoded)
|
||||||
|
#expect(result == "HelloWorld")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test("ISO-8859-1 encoded filename decoded correctly")
|
||||||
|
func iso88591() {
|
||||||
|
// "café" — é = 0xE9 in ISO-8859-1, base64 of "café" in ISO-8859-1 is "Y2Fm6Q=="
|
||||||
|
let encoded = "=?iso-8859-1?B?Y2Fm6Q==?="
|
||||||
|
let result = RFC2047Decoder.decode(encoded)
|
||||||
|
#expect(result == "café")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test("underscores in Q-encoding replaced with spaces")
|
||||||
|
func qEncodingUnderscores() {
|
||||||
|
let encoded = "=?utf-8?Q?my_file_name.pdf?="
|
||||||
|
let result = RFC2047Decoder.decode(encoded)
|
||||||
|
#expect(result == "my file name.pdf")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user