task 6: produce attributedText in EPUBParser, add attributed string tests
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,12 @@ import Foundation
|
||||
import ZIPFoundation
|
||||
import SwiftSoup
|
||||
|
||||
#if canImport(UIKit)
|
||||
import UIKit
|
||||
#elseif canImport(AppKit)
|
||||
import AppKit
|
||||
#endif
|
||||
|
||||
public enum EPUBParserError: Error, CustomStringConvertible {
|
||||
case cannotOpenArchive(URL)
|
||||
case missingContainerXML
|
||||
@@ -43,6 +49,7 @@ public struct EPUBParser {
|
||||
|
||||
var chapterTitle: String
|
||||
var chapterText: String
|
||||
var formattingRanges: [AttributedStringBuilder.FormattingRange] = []
|
||||
do {
|
||||
let htmlData = try extractData(from: archive, path: fullPath)
|
||||
let html = String(data: htmlData, encoding: .utf8) ?? ""
|
||||
@@ -50,12 +57,14 @@ public struct EPUBParser {
|
||||
chapterTitle = try doc.select("h1, h2, h3, title").first()?.text() ?? "Chapter \(index + 1)"
|
||||
let body = try doc.body()?.text() ?? ""
|
||||
chapterText = normalizeWhitespace(body)
|
||||
formattingRanges = extractFormattingRanges(from: doc, normalizedText: chapterText)
|
||||
} catch {
|
||||
chapterTitle = "Chapter \(index + 1) (parse error)"
|
||||
chapterText = ""
|
||||
}
|
||||
|
||||
chapters.append(Chapter(index: index, title: chapterTitle, text: chapterText))
|
||||
let attributedText = AttributedStringBuilder.build(text: chapterText, ranges: formattingRanges)
|
||||
chapters.append(Chapter(index: index, title: chapterTitle, text: chapterText, attributedText: attributedText))
|
||||
}
|
||||
|
||||
return Book(
|
||||
@@ -93,6 +102,32 @@ public struct EPUBParser {
|
||||
return data
|
||||
}
|
||||
|
||||
private static func extractFormattingRanges(from doc: Document, normalizedText: String) -> [AttributedStringBuilder.FormattingRange] {
|
||||
var ranges: [AttributedStringBuilder.FormattingRange] = []
|
||||
guard !normalizedText.isEmpty else { return ranges }
|
||||
|
||||
func findAndAdd(selector: String, style: AttributedStringBuilder.FormattingRange.Style) {
|
||||
guard let elements = try? doc.select(selector) else { return }
|
||||
for element in elements {
|
||||
guard let elementText = try? element.text() else { continue }
|
||||
let normalizedElement = normalizeWhitespace(elementText)
|
||||
guard !normalizedElement.isEmpty else { continue }
|
||||
if let range = normalizedText.range(of: normalizedElement) {
|
||||
let nsRange = NSRange(range, in: normalizedText)
|
||||
ranges.append(AttributedStringBuilder.FormattingRange(range: nsRange, style: style))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
findAndAdd(selector: "b, strong", style: .bold)
|
||||
findAndAdd(selector: "i, em", style: .italic)
|
||||
findAndAdd(selector: "h1", style: .heading(level: 1))
|
||||
findAndAdd(selector: "h2", style: .heading(level: 2))
|
||||
findAndAdd(selector: "h3", style: .heading(level: 3))
|
||||
|
||||
return ranges
|
||||
}
|
||||
|
||||
private static func normalizeWhitespace(_ text: String) -> String {
|
||||
text.components(separatedBy: .whitespacesAndNewlines)
|
||||
.filter { !$0.isEmpty }
|
||||
|
||||
@@ -34,4 +34,22 @@ struct EPUBParserTests {
|
||||
#expect(chapter.index == i)
|
||||
}
|
||||
}
|
||||
|
||||
@Test func attributedTextStringMatchesPlainText() throws {
|
||||
let book = try EPUBParser.parse(url: fixtureURL)
|
||||
for chapter in book.chapters {
|
||||
#expect(chapter.attributedText.string == chapter.text,
|
||||
"attributedText.string must be identical to text for chapter '\(chapter.title)'")
|
||||
}
|
||||
}
|
||||
|
||||
@Test func attributedTextHasFontAttributes() throws {
|
||||
let book = try EPUBParser.parse(url: fixtureURL)
|
||||
let chapter = book.chapters[0]
|
||||
var hasFont = false
|
||||
chapter.attributedText.enumerateAttribute(.font, in: NSRange(location: 0, length: chapter.attributedText.length)) { value, _, _ in
|
||||
if value != nil { hasFont = true }
|
||||
}
|
||||
#expect(hasFont, "attributedText should have font attributes")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user