From 53b91ee4ed69f3a401259e27f87a6522b536d16f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20F=C3=B6rtsch?= Date: Fri, 13 Mar 2026 21:48:39 +0100 Subject: [PATCH] scaffold VorleserKit package with shared types, sentence segmenter, tests Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 2 + VorleserKit/Package.resolved | 24 +++++++++ VorleserKit/Package.swift | 37 +++++++++++++ VorleserKit/Sources/BookParser/Book.swift | 52 +++++++++++++++++++ VorleserKit/Sources/BookParser/Chapter.swift | 11 ++++ .../Sources/VorleserKit/Sentence.swift | 9 ++++ .../VorleserKit/SentenceSegmenter.swift | 18 +++++++ .../Sources/VorleserKit/VorleserKit.swift | 2 + .../Tests/BookParserTests/Fixtures/.gitkeep | 0 .../SentenceSegmenterTests.swift | 39 ++++++++++++++ 10 files changed, 194 insertions(+) create mode 100644 VorleserKit/Package.resolved create mode 100644 VorleserKit/Package.swift create mode 100644 VorleserKit/Sources/BookParser/Book.swift create mode 100644 VorleserKit/Sources/BookParser/Chapter.swift create mode 100644 VorleserKit/Sources/VorleserKit/Sentence.swift create mode 100644 VorleserKit/Sources/VorleserKit/SentenceSegmenter.swift create mode 100644 VorleserKit/Sources/VorleserKit/VorleserKit.swift create mode 100644 VorleserKit/Tests/BookParserTests/Fixtures/.gitkeep create mode 100644 VorleserKit/Tests/VorleserKitTests/SentenceSegmenterTests.swift diff --git a/.gitignore b/.gitignore index 9d0f510..35cf6ff 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ autoaudiobook/samples/ # Build outputs build/ +.build/ +**/.build/ DerivedData/ *.dSYM/ *.log diff --git a/VorleserKit/Package.resolved b/VorleserKit/Package.resolved new file mode 100644 index 0000000..db443d9 --- /dev/null +++ b/VorleserKit/Package.resolved @@ -0,0 +1,24 @@ +{ + "originHash" : "1bf1d418d8d58ea936176af8e96313605ea72a6fbf437f877b8e5d9a5b0d822c", + "pins" : [ + { + "identity" : "swiftsoup", + "kind" : "remoteSourceControl", + "location" : "https://github.com/scinfu/SwiftSoup.git", + "state" : { + "revision" : "dba183c96b2da4e4b80bb31b1e2e59cb9542b8fc", + "version" : "2.13.0" + } + }, + { + "identity" : "zipfoundation", + "kind" : "remoteSourceControl", + "location" : "https://github.com/weichsel/ZIPFoundation.git", + "state" : { + "revision" : "22787ffb59de99e5dc1fbfe80b19c97a904ad48d", + "version" : "0.9.20" + } + } + ], + "version" : 3 +} diff --git a/VorleserKit/Package.swift b/VorleserKit/Package.swift new file mode 100644 index 0000000..2c9b798 --- /dev/null +++ b/VorleserKit/Package.swift @@ -0,0 +1,37 @@ +// swift-tools-version: 6.2 +import PackageDescription + +let package = Package( + name: "VorleserKit", + platforms: [ + .iOS(.v18), + .macOS(.v15), + ], + products: [ + .library(name: "VorleserKit", targets: ["VorleserKit"]), + .library(name: "BookParser", targets: ["BookParser"]), + ], + dependencies: [ + .package(url: "https://github.com/weichsel/ZIPFoundation.git", from: "0.9.0"), + .package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.7.0"), + ], + targets: [ + .target( + name: "VorleserKit", + dependencies: [] + ), + .target( + name: "BookParser", + dependencies: ["VorleserKit", "ZIPFoundation", "SwiftSoup"] + ), + .testTarget( + name: "BookParserTests", + dependencies: ["BookParser"], + resources: [.copy("Fixtures")] + ), + .testTarget( + name: "VorleserKitTests", + dependencies: ["VorleserKit"] + ), + ] +) diff --git a/VorleserKit/Sources/BookParser/Book.swift b/VorleserKit/Sources/BookParser/Book.swift new file mode 100644 index 0000000..55a748b --- /dev/null +++ b/VorleserKit/Sources/BookParser/Book.swift @@ -0,0 +1,52 @@ +import Foundation +import VorleserKit + +public struct Book: Sendable { + public let id: UUID + public let title: String + public let author: String? + public let chapters: [Chapter] + + public init(id: UUID = UUID(), title: String, author: String?, chapters: [Chapter]) { + self.id = id + self.title = title + self.author = author + self.chapters = chapters + } + + /// All sentences across all chapters, with global character offsets. + public var sentences: [Sentence] { + var result: [Sentence] = [] + var offset: CharacterOffset = 0 + for chapter in chapters { + let chapterSentences = SentenceSegmenter.segment(chapter.text, globalOffset: offset) + result.append(contentsOf: chapterSentences) + offset += chapter.text.count + } + return result + } + + /// Returns the sentence index containing the given global character offset. + public func sentenceIndex(containing offset: CharacterOffset) -> Int? { + let allSentences = sentences + return allSentences.firstIndex { $0.range.contains(offset) } + } + + /// Maps a global character offset to (chapter index, local offset within chapter). + public func chapterAndLocalOffset(for globalOffset: CharacterOffset) -> (chapterIndex: Int, localOffset: Int)? { + var offset = 0 + for chapter in chapters { + let chapterEnd = offset + chapter.text.count + if globalOffset < chapterEnd { + return (chapter.index, globalOffset - offset) + } + offset = chapterEnd + } + return nil + } + + /// Total character count across all chapters. + public var totalCharacters: Int { + chapters.reduce(0) { $0 + $1.text.count } + } +} diff --git a/VorleserKit/Sources/BookParser/Chapter.swift b/VorleserKit/Sources/BookParser/Chapter.swift new file mode 100644 index 0000000..0da471b --- /dev/null +++ b/VorleserKit/Sources/BookParser/Chapter.swift @@ -0,0 +1,11 @@ +public struct Chapter: Sendable { + public let index: Int + public let title: String + public let text: String + + public init(index: Int, title: String, text: String) { + self.index = index + self.title = title + self.text = text + } +} diff --git a/VorleserKit/Sources/VorleserKit/Sentence.swift b/VorleserKit/Sources/VorleserKit/Sentence.swift new file mode 100644 index 0000000..a0df59e --- /dev/null +++ b/VorleserKit/Sources/VorleserKit/Sentence.swift @@ -0,0 +1,9 @@ +public struct Sentence: Sendable { + public let text: String + public let range: Range + + public init(text: String, range: Range) { + self.text = text + self.range = range + } +} diff --git a/VorleserKit/Sources/VorleserKit/SentenceSegmenter.swift b/VorleserKit/Sources/VorleserKit/SentenceSegmenter.swift new file mode 100644 index 0000000..0b6e6c0 --- /dev/null +++ b/VorleserKit/Sources/VorleserKit/SentenceSegmenter.swift @@ -0,0 +1,18 @@ +import NaturalLanguage + +public struct SentenceSegmenter: Sendable { + public static func segment(_ text: String, globalOffset: CharacterOffset = 0) -> [Sentence] { + let tokenizer = NLTokenizer(unit: .sentence) + tokenizer.string = text + var sentences: [Sentence] = [] + tokenizer.enumerateTokens(in: text.startIndex..= 100) + } + + @Test func handlesEmptyText() { + let sentences = SentenceSegmenter.segment("") + #expect(sentences.isEmpty) + } + + @Test func handlesSingleSentence() { + let text = "Just one sentence." + let sentences = SentenceSegmenter.segment(text) + #expect(sentences.count == 1) + #expect(sentences[0].text == "Just one sentence.") + } +}