vorleser/VorleserMac/Services/EPUBService.swift

import Foundation
import ZIPFoundation
import SwiftSoup

struct EPUBChapter: Identifiable, Hashable {
    let id: UUID
    let title: String
    let rawText: String

    init(title: String, rawText: String) {
        self.id = UUID()
        self.title = title
        self.rawText = rawText
    }
}

final class EPUBService {
    enum EPUBError: Error, LocalizedError {
        case missingContainer
        case missingRootfile
        case missingOPF
        case invalidOPF
        case missingSpine

        var errorDescription: String? {
            switch self {
            case .missingContainer:
                return "Missing META-INF/container.xml"
            case .missingRootfile:
                return "container.xml did not include a rootfile"
            case .missingOPF:
                return "OPF file missing"
            case .invalidOPF:
                return "OPF parsing failed"
            case .missingSpine:
                return "OPF spine is empty"
            }
        }
    }

    func extractChapters(from epubURL: URL) throws -> [EPUBChapter] {
        let tmpDir = try createTempDirectory()
        try unzip(epubURL, to: tmpDir)

        let containerURL = tmpDir.appendingPathComponent("META-INF/container.xml")
        guard FileManager.default.fileExists(atPath: containerURL.path) else {
            throw EPUBError.missingContainer
        }

        let containerData = try Data(contentsOf: containerURL)
        let rootfilePath = try parseContainer(data: containerData)
        guard let rootfilePath else {
            throw EPUBError.missingRootfile
        }

        let opfURL = tmpDir.appendingPathComponent(rootfilePath)
        guard FileManager.default.fileExists(atPath: opfURL.path) else {
            throw EPUBError.missingOPF
        }

        let opfData = try Data(contentsOf: opfURL)
        let opfResult = try parseOPF(data: opfData)
        guard !opfResult.spine.isEmpty else {
            throw EPUBError.missingSpine
        }

        let baseURL = opfURL.deletingLastPathComponent()
        var chapters: [EPUBChapter] = []

        for idref in opfResult.spine {
            guard let href = opfResult.manifest[idref] else { continue }
            let contentURL = baseURL.appendingPathComponent(href)
            guard FileManager.default.fileExists(atPath: contentURL.path) else { continue }

            let html = try String(contentsOf: contentURL, encoding: .utf8)
            let text = try SwiftSoup.parse(html).text()
            let title = try extractTitle(from: html) ?? contentURL.deletingPathExtension().lastPathComponent
            let cleaned = cleanText(text)
            if cleaned.isEmpty { continue }
            chapters.append(EPUBChapter(title: title, rawText: cleaned))
        }

        return chapters
    }

    private func unzip(_ url: URL, to destination: URL) throws {
        let archive = try Archive(url: url, accessMode: .read)
        for entry in archive {
            let entryURL = destination.appendingPathComponent(entry.path)
            let parent = entryURL.deletingLastPathComponent()
            if !FileManager.default.fileExists(atPath: parent.path) {
                try FileManager.default.createDirectory(at: parent, withIntermediateDirectories: true)
            }
            _ = try archive.extract(entry, to: entryURL)
        }
    }

    private func createTempDirectory() throws -> URL {
        let base = URL(fileURLWithPath: NSTemporaryDirectory())
        let dir = base.appendingPathComponent("epub_\(UUID().uuidString)", isDirectory: true)
        try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
        return dir
    }

    private func parseContainer(data: Data) throws -> String? {
        let parser = XMLParser(data: data)
        let delegate = ContainerParser()
        parser.delegate = delegate
        parser.parse()
        return delegate.rootfilePath
    }

    private func parseOPF(data: Data) throws -> OPFParseResult {
        let parser = XMLParser(data: data)
        let delegate = OPFParser()
        parser.delegate = delegate
        parser.parse()
        return OPFParseResult(title: delegate.title?.trimmingCharacters(in: .whitespacesAndNewlines), manifest: delegate.manifest, spine: delegate.spine)
    }

    private func extractTitle(from html: String) throws -> String? {
        let doc = try SwiftSoup.parse(html)
        if let h1 = try doc.select("h1").first() {
            return try h1.text()
        }
        if let title = try doc.select("title").first() {
            return try title.text()
        }
        return nil
    }

    private func cleanText(_ text: String) -> String {
        let collapsed = text.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression)
        return collapsed.trimmingCharacters(in: .whitespacesAndNewlines)
    }
}

private struct OPFParseResult {
    let title: String?
    let manifest: [String: String]
    let spine: [String]
}

private final class ContainerParser: NSObject, XMLParserDelegate {
    var rootfilePath: String?

    func parser(_ parser: XMLParser, didStartElement elementName: String, namespaceURI: String?, qualifiedName qName: String?, attributes attributeDict: [String : String] = [:]) {
        guard elementName.lowercased().contains("rootfile") else { return }
        if let path = attributeDict["full-path"] {
            rootfilePath = path
        }
    }
}

private final class OPFParser: NSObject, XMLParserDelegate {
    var manifest: [String: String] = [:]
    var spine: [String] = []
    var title: String?

    private var isCollectingTitle = false
    private var titleBuffer = ""

    func parser(_ parser: XMLParser, didStartElement elementName: String, namespaceURI: String?, qualifiedName qName: String?, attributes attributeDict: [String : String] = [:]) {
        let name = elementName.lowercased()
        if name == "item", let id = attributeDict["id"], let href = attributeDict["href"] {
            manifest[id] = href
        } else if name == "itemref", let idref = attributeDict["idref"] {
            spine.append(idref)
        } else if name.hasSuffix("title"), title == nil {
            isCollectingTitle = true
            titleBuffer = ""
        }
    }

    func parser(_ parser: XMLParser, foundCharacters string: String) {
        if isCollectingTitle {
            titleBuffer.append(string)
        }
    }

    func parser(_ parser: XMLParser, didEndElement elementName: String, namespaceURI: String?, qualifiedName qName: String?) {
        if isCollectingTitle, elementName.lowercased().hasSuffix("title") {
            let trimmed = titleBuffer.trimmingCharacters(in: .whitespacesAndNewlines)
            if !trimmed.isEmpty {
                title = trimmed
            }
            isCollectingTitle = false
        }
    }
}