task 6: produce attributedText in EPUBParser, add attributed string tests

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-14 05:36:04 +01:00
parent 31d1f1d20f
commit bfbfb93b5a
2 changed files with 54 additions and 1 deletions

View File

@@ -2,6 +2,12 @@ import Foundation
import ZIPFoundation
import SwiftSoup
#if canImport(UIKit)
import UIKit
#elseif canImport(AppKit)
import AppKit
#endif
public enum EPUBParserError: Error, CustomStringConvertible {
case cannotOpenArchive(URL)
case missingContainerXML
@@ -43,6 +49,7 @@ public struct EPUBParser {
var chapterTitle: String
var chapterText: String
var formattingRanges: [AttributedStringBuilder.FormattingRange] = []
do {
let htmlData = try extractData(from: archive, path: fullPath)
let html = String(data: htmlData, encoding: .utf8) ?? ""
@@ -50,12 +57,14 @@ public struct EPUBParser {
chapterTitle = try doc.select("h1, h2, h3, title").first()?.text() ?? "Chapter \(index + 1)"
let body = try doc.body()?.text() ?? ""
chapterText = normalizeWhitespace(body)
formattingRanges = extractFormattingRanges(from: doc, normalizedText: chapterText)
} catch {
chapterTitle = "Chapter \(index + 1) (parse error)"
chapterText = ""
}
chapters.append(Chapter(index: index, title: chapterTitle, text: chapterText))
let attributedText = AttributedStringBuilder.build(text: chapterText, ranges: formattingRanges)
chapters.append(Chapter(index: index, title: chapterTitle, text: chapterText, attributedText: attributedText))
}
return Book(
@@ -93,6 +102,32 @@ public struct EPUBParser {
return data
}
private static func extractFormattingRanges(from doc: Document, normalizedText: String) -> [AttributedStringBuilder.FormattingRange] {
var ranges: [AttributedStringBuilder.FormattingRange] = []
guard !normalizedText.isEmpty else { return ranges }
func findAndAdd(selector: String, style: AttributedStringBuilder.FormattingRange.Style) {
guard let elements = try? doc.select(selector) else { return }
for element in elements {
guard let elementText = try? element.text() else { continue }
let normalizedElement = normalizeWhitespace(elementText)
guard !normalizedElement.isEmpty else { continue }
if let range = normalizedText.range(of: normalizedElement) {
let nsRange = NSRange(range, in: normalizedText)
ranges.append(AttributedStringBuilder.FormattingRange(range: nsRange, style: style))
}
}
}
findAndAdd(selector: "b, strong", style: .bold)
findAndAdd(selector: "i, em", style: .italic)
findAndAdd(selector: "h1", style: .heading(level: 1))
findAndAdd(selector: "h2", style: .heading(level: 2))
findAndAdd(selector: "h3", style: .heading(level: 3))
return ranges
}
private static func normalizeWhitespace(_ text: String) -> String {
text.components(separatedBy: .whitespacesAndNewlines)
.filter { !$0.isEmpty }

View File

@@ -34,4 +34,22 @@ struct EPUBParserTests {
#expect(chapter.index == i)
}
}
@Test func attributedTextStringMatchesPlainText() throws {
let book = try EPUBParser.parse(url: fixtureURL)
for chapter in book.chapters {
#expect(chapter.attributedText.string == chapter.text,
"attributedText.string must be identical to text for chapter '\(chapter.title)'")
}
}
@Test func attributedTextHasFontAttributes() throws {
let book = try EPUBParser.parse(url: fixtureURL)
let chapter = book.chapters[0]
var hasFont = false
chapter.attributedText.enumerateAttribute(.font, in: NSRange(location: 0, length: chapter.attributedText.length)) { value, _, _ in
if value != nil { hasFont = true }
}
#expect(hasFont, "attributedText should have font attributes")
}
}