// // QueryParser.swift // SwiftSoup // // Created by Nabil Chatbi on 23/10/16. // Copyright © 2016 Nabil Chatbi.. All rights reserved. // import Foundation /** * Parses a CSS selector into an Evaluator tree. */ public class QueryParser { private static let combinators: [String] = [",", ">", "+", "~", " "] private static let AttributeEvals: [String] = ["=", "!=", "^=", "$=", "*=", "~="] private var tq: TokenQueue private var query: String private var evals: Array = Array() /** * Create a new QueryParser. * @param query CSS query */ private init(_ query: String) { self.query = query self.tq = TokenQueue(query) } /** * Parse a CSS query into an Evaluator. * @param query CSS query * @return Evaluator */ public static func parse(_ query: String)throws->Evaluator { let p = QueryParser(query) return try p.parse() } /** * Parse the query * @return Evaluator */ public func parse()throws->Evaluator { tq.consumeWhitespace() if (tq.matchesAny(QueryParser.combinators)) { // if starts with a combinator, use root as elements evals.append( StructuralEvaluator.Root()) try combinator(tq.consume()) } else { try findElements() } while (!tq.isEmpty()) { // hierarchy and extras let seenWhite: Bool = tq.consumeWhitespace() if (tq.matchesAny(QueryParser.combinators)) { try combinator(tq.consume()) } else if (seenWhite) { try combinator(" " as Character) } else { // E.class, E#id, E[attr] etc. AND try findElements() // take next el, #. etc off queue } } if (evals.count == 1) { return evals[0] } return CombiningEvaluator.And(evals) } private func combinator(_ combinator: Character)throws { tq.consumeWhitespace() let subQuery: String = consumeSubQuery() // support multi > childs var rootEval: Evaluator? // the new topmost evaluator var currentEval: Evaluator? // the evaluator the new eval will be combined to. could be root, or rightmost or. let newEval: Evaluator = try QueryParser.parse(subQuery) // the evaluator to add into target evaluator var replaceRightMost: Bool = false if (evals.count == 1) { currentEval = evals[0] rootEval = currentEval // make sure OR (,) has precedence: if (((rootEval as? CombiningEvaluator.Or) != nil) && combinator != ",") { currentEval = (currentEval as! CombiningEvaluator.Or).rightMostEvaluator() replaceRightMost = true } } else { currentEval = CombiningEvaluator.And(evals) rootEval = currentEval } evals.removeAll() // for most combinators: change the current eval into an AND of the current eval and the new eval if (combinator == ">") {currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.ImmediateParent(currentEval!))} else if (combinator == " ") {currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.Parent(currentEval!))} else if (combinator == "+") {currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.ImmediatePreviousSibling(currentEval!))} else if (combinator == "~") {currentEval = CombiningEvaluator.And(newEval, StructuralEvaluator.PreviousSibling(currentEval!))} else if (combinator == ",") { // group or. let or: CombiningEvaluator.Or if ((currentEval as? CombiningEvaluator.Or) != nil) { or = currentEval as! CombiningEvaluator.Or or.add(newEval) } else { or = CombiningEvaluator.Or() or.add(currentEval!) or.add(newEval) } currentEval = or } else { throw Exception.Error(type: ExceptionType.SelectorParseException, Message: "Unknown combinator: \(String(combinator))") } if (replaceRightMost) { (rootEval as! CombiningEvaluator.Or).replaceRightMostEvaluator(currentEval!) } else { rootEval = currentEval } evals.append(rootEval!) } private func consumeSubQuery() -> String { let sq: StringBuilder = StringBuilder() while (!tq.isEmpty()) { if (tq.matches("(")) { sq.append("(").append(tq.chompBalanced("(", ")")).append(")") } else if (tq.matches("[")) { sq.append("[").append(tq.chompBalanced("[", "]")).append("]") } else if (tq.matchesAny(QueryParser.combinators)) { break } else { sq.append(tq.consume()) } } return sq.toString() } private func findElements()throws { if (tq.matchChomp("#")) { try byId() } else if (tq.matchChomp(".")) { try byClass()} else if (tq.matchesWord() || tq.matches("*|")) {try byTag()} else if (tq.matches("[")) {try byAttribute()} else if (tq.matchChomp("*")) { allElements()} else if (tq.matchChomp(":lt(")) {try indexLessThan()} else if (tq.matchChomp(":gt(")) {try indexGreaterThan()} else if (tq.matchChomp(":eq(")) {try indexEquals()} else if (tq.matches(":has(")) {try has()} else if (tq.matches(":contains(")) {try contains(false)} else if (tq.matches(":containsOwn(")) {try contains(true)} else if (tq.matches(":matches(")) {try matches(false)} else if (tq.matches(":matchesOwn(")) {try matches(true)} else if (tq.matches(":not(")) {try not()} else if (tq.matchChomp(":nth-child(")) {try cssNthChild(false, false)} else if (tq.matchChomp(":nth-last-child(")) {try cssNthChild(true, false)} else if (tq.matchChomp(":nth-of-type(")) {try cssNthChild(false, true)} else if (tq.matchChomp(":nth-last-of-type(")) {try cssNthChild(true, true)} else if (tq.matchChomp(":first-child")) {evals.append(Evaluator.IsFirstChild())} else if (tq.matchChomp(":last-child")) {evals.append(Evaluator.IsLastChild())} else if (tq.matchChomp(":first-of-type")) {evals.append(Evaluator.IsFirstOfType())} else if (tq.matchChomp(":last-of-type")) {evals.append(Evaluator.IsLastOfType())} else if (tq.matchChomp(":only-child")) {evals.append(Evaluator.IsOnlyChild())} else if (tq.matchChomp(":only-of-type")) {evals.append(Evaluator.IsOnlyOfType())} else if (tq.matchChomp(":empty")) {evals.append(Evaluator.IsEmpty())} else if (tq.matchChomp(":root")) {evals.append(Evaluator.IsRoot())} else // unhandled { throw Exception.Error(type: ExceptionType.SelectorParseException, Message: "Could not parse query \(query): unexpected token at \(tq.remainder())") } } private func byId()throws { let id: String = tq.consumeCssIdentifier() try Validate.notEmpty(string: id) evals.append(Evaluator.Id(id)) } private func byClass()throws { let className: String = tq.consumeCssIdentifier() try Validate.notEmpty(string: className) evals.append(Evaluator.Class(className.trim())) } private func byTag()throws { var tagName = tq.consumeElementSelector() try Validate.notEmpty(string: tagName) // namespaces: wildcard match equals(tagName) or ending in ":"+tagName if (tagName.startsWith("*|")) { evals.append( CombiningEvaluator.Or( Evaluator.Tag(tagName.trim().lowercased()), Evaluator.TagEndsWith(tagName.replacingOccurrences(of: "*|", with: ":").trim().lowercased()))) } else { // namespaces: if element name is "abc:def", selector must be "abc|def", so flip: if (tagName.contains("|")) { tagName = tagName.replacingOccurrences(of: "|", with: ":") } evals.append(Evaluator.Tag(tagName.trim())) } } private func byAttribute()throws { let cq: TokenQueue = TokenQueue(tq.chompBalanced("[", "]")) // content queue let key: String = cq.consumeToAny(QueryParser.AttributeEvals) // eq, not, start, end, contain, match, (no val) try Validate.notEmpty(string: key) cq.consumeWhitespace() if (cq.isEmpty()) { if (key.startsWith("^")) { evals.append(try Evaluator.AttributeStarting(key.substring(1))) } else { evals.append(Evaluator.Attribute(key)) } } else { if (cq.matchChomp("=")) { evals.append(try Evaluator.AttributeWithValue(key, cq.remainder())) } else if (cq.matchChomp("!=")) { evals.append(try Evaluator.AttributeWithValueNot(key, cq.remainder())) } else if (cq.matchChomp("^=")) { evals.append(try Evaluator.AttributeWithValueStarting(key, cq.remainder())) } else if (cq.matchChomp("$=")) { evals.append(try Evaluator.AttributeWithValueEnding(key, cq.remainder())) } else if (cq.matchChomp("*=")) { evals.append(try Evaluator.AttributeWithValueContaining(key, cq.remainder())) } else if (cq.matchChomp("~=")) { evals.append( Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder()))) } else { throw Exception.Error(type: ExceptionType.SelectorParseException, Message: "Could not parse attribute query '\(query)': unexpected token at '\(cq.remainder())'") } } } private func allElements() { evals.append(Evaluator.AllElements()) } // pseudo selectors :lt, :gt, :eq private func indexLessThan()throws { evals.append(Evaluator.IndexLessThan(try consumeIndex())) } private func indexGreaterThan()throws { evals.append(Evaluator.IndexGreaterThan(try consumeIndex())) } private func indexEquals()throws { evals.append(Evaluator.IndexEquals(try consumeIndex())) } //pseudo selectors :first-child, :last-child, :nth-child, ... private static let NTH_AB: Pattern = Pattern.compile("((\\+|-)?(\\d+)?)n(\\s*(\\+|-)?\\s*\\d+)?", Pattern.CASE_INSENSITIVE) private static let NTH_B: Pattern = Pattern.compile("(\\+|-)?(\\d+)") private func cssNthChild(_ backwards: Bool, _ ofType: Bool)throws { let argS: String = tq.chompTo(")").trim().lowercased() let mAB: Matcher = QueryParser.NTH_AB.matcher(in: argS) let mB: Matcher = QueryParser.NTH_B.matcher(in: argS) var a: Int var b: Int if ("odd"==argS) { a = 2 b = 1 } else if ("even"==argS) { a = 2 b = 0 } else if (mAB.matches.count > 0) { mAB.find() a = mAB.group(3) != nil ? Int(mAB.group(1)!.replaceFirst(of: "^\\+", with: ""))! : 1 b = mAB.group(4) != nil ? Int(mAB.group(4)!.replaceFirst(of: "^\\+", with: ""))! : 0 } else if (mB.matches.count > 0) { a = 0 mB.find() b = Int(mB.group()!.replaceFirst(of: "^\\+", with: ""))! } else { throw Exception.Error(type: ExceptionType.SelectorParseException, Message: "Could not parse nth-index '\(argS)': unexpected format") } if (ofType) { if (backwards) { evals.append(Evaluator.IsNthLastOfType(a, b)) } else { evals.append(Evaluator.IsNthOfType(a, b)) } } else { if (backwards) { evals.append(Evaluator.IsNthLastChild(a, b)) } else { evals.append(Evaluator.IsNthChild(a, b)) } } } private func consumeIndex()throws->Int { let indexS: String = tq.chompTo(")").trim() try Validate.isTrue(val: StringUtil.isNumeric(indexS), msg: "Index must be numeric") return Int(indexS)! } // pseudo selector :has(el) private func has()throws { try tq.consume(":has") let subQuery: String = tq.chompBalanced("(", ")") try Validate.notEmpty(string: subQuery, msg: ":has(el) subselect must not be empty") evals.append(StructuralEvaluator.Has(try QueryParser.parse(subQuery))) } // pseudo selector :contains(text), containsOwn(text) private func contains(_ own: Bool)throws { try tq.consume(own ? ":containsOwn" : ":contains") let searchText: String = TokenQueue.unescape(tq.chompBalanced("(", ")")) try Validate.notEmpty(string: searchText, msg: ":contains(text) query must not be empty") if (own) { evals.append(Evaluator.ContainsOwnText(searchText)) } else { evals.append(Evaluator.ContainsText(searchText)) } } // :matches(regex), matchesOwn(regex) private func matches(_ own: Bool)throws { try tq.consume(own ? ":matchesOwn" : ":matches") let regex: String = tq.chompBalanced("(", ")") // don't unescape, as regex bits will be escaped try Validate.notEmpty(string: regex, msg: ":matches(regex) query must not be empty") if (own) { evals.append(Evaluator.MatchesOwn(Pattern.compile(regex))) } else { evals.append(Evaluator.Matches(Pattern.compile(regex))) } } // :not(selector) private func not()throws { try tq.consume(":not") let subQuery: String = tq.chompBalanced("(", ")") try Validate.notEmpty(string: subQuery, msg: ":not(selector) subselect must not be empty") evals.append(StructuralEvaluator.Not(try QueryParser.parse(subQuery))) } }