#[derive(Clone, Debug, PartialEq, Eq)] pub struct Element { name: String, attributes: Vec<(String, String)>, children: Vec, } pub struct BoxedParser<'a, Output> { parser: Box + 'a>, } pub type ParseResult<'a, Output> = Result<(&'a str, Output), &'a str>; pub trait Parser<'a, Output> { fn parse(&self, input: &'a str) -> ParseResult<'a, Output>; fn map(self, map_fn: F) -> BoxedParser<'a, NewOutput> where Self: Sized + 'a, Output: 'a, NewOutput: 'a, F: Fn(Output) -> NewOutput + 'a, { BoxedParser::new(map(self, map_fn)) } fn pred(self, pred_fn: F) -> BoxedParser<'a, Output> where Self: Sized + 'a, Output: 'a, F: Fn(&Output) -> bool + 'a, { BoxedParser::new(pred(self, pred_fn)) } fn and_then(self, f: F) -> BoxedParser<'a, NewOutput> where Self: Sized + 'a, Output: 'a, NewOutput: 'a, NextParser: Parser<'a, NewOutput> + 'a, F: Fn(Output) -> NextParser + 'a, { BoxedParser::new(and_then(self, f)) } } impl<'a, F, Output> Parser<'a, Output> for F where F: Fn(&'a str) -> ParseResult, { fn parse(&self, input: &'a str) -> ParseResult<'a, Output> { self(input) } } impl<'a, Output> BoxedParser<'a, Output> { fn new

(parser: P) -> Self where P: Parser<'a, Output> + 'a, { BoxedParser { parser: Box::new(parser), } } } impl<'a, Output> Parser<'a, Output> for BoxedParser<'a, Output> { fn parse(&self, input: &'a str) -> ParseResult<'a, Output> { self.parser.parse(input) } } fn match_literal<'a>(expected: &'static str) -> impl Parser<'a, ()> { move |input: &'a str| match input.get(0..expected.len()) { Some(next) if next == expected => Ok((&input[expected.len()..], ())), _ => Err(input), } } fn identifier(input: &str) -> ParseResult { let mut matched = String::new(); let mut chars = input.chars(); match chars.next() { Some(next) if next.is_alphabetic() => matched.push(next), _ => return Err(input), } while let Some(next) = chars.next() { if next.is_alphanumeric() || next == '-' { matched.push(next); } else { break; } } let next_index = matched.len(); Ok((&input[next_index..], matched)) } fn pair<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, (R1, R2)> where P1: Parser<'a, R1>, P2: Parser<'a, R2>, { move |input| { parser1.parse(input).and_then(|(next_input, result1)| { parser2 .parse(next_input) .map(|(last_input, result2)| (last_input, (result1, result2))) }) } } fn map<'a, P, F, A, B>(parser: P, map_fn: F) -> impl Parser<'a, B> where P: Parser<'a, A>, F: Fn(A) -> B, { move |input| { parser .parse(input) .map(|(next_input, result)| (next_input, map_fn(result))) } } fn left<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, R1> where P1: Parser<'a, R1>, P2: Parser<'a, R2>, { map(pair(parser1, parser2), |(left, _right)| left) } fn right<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, R2> where P1: Parser<'a, R1>, P2: Parser<'a, R2>, { map(pair(parser1, parser2), |(_left, right)| right) } fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec> where P: Parser<'a, A>, { move |mut input| { let mut result = Vec::new(); if let Ok((next_input, first_item)) = parser.parse(input) { input = next_input; result.push(first_item); } else { return Err(input); } while let Ok((next_input, next_item)) = parser.parse(input) { input = next_input; result.push(next_item); } Ok((input, result)) } } fn zero_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec> where P: Parser<'a, A>, { move |mut input| { let mut result = Vec::new(); while let Ok((next_input, next_item)) = parser.parse(input) { input = next_input; result.push(next_item); } Ok((input, result)) } } fn any_char(input: &str) -> ParseResult { match input.chars().next() { Some(next) => Ok((&input[next.len_utf8()..], next)), _ => Err(input), } } fn pred<'a, P, A, F>(parser: P, predicate: F) -> impl Parser<'a, A> where P: Parser<'a, A>, F: Fn(&A) -> bool, { move |input| { if let Ok((next_input, value)) = parser.parse(input) { if predicate(&value) { return Ok((next_input, value)); } } Err(input) } } fn whitespace_char<'a>() -> impl Parser<'a, char> { pred(any_char, |c| c.is_whitespace()) } fn space1<'a>() -> impl Parser<'a, Vec> { one_or_more(whitespace_char()) } fn space0<'a>() -> impl Parser<'a, Vec> { zero_or_more(whitespace_char()) } fn quoted_string<'a>() -> impl Parser<'a, String> { right( match_literal("\""), left( zero_or_more(any_char.pred(|c| *c != '"')), match_literal("\""), ), ) .map(|chars| chars.into_iter().collect()) } fn attribute_pair<'a>() -> impl Parser<'a, (String, String)> { pair(identifier, right(match_literal("="), quoted_string())) } fn attributes<'a>() -> impl Parser<'a, Vec<(String, String)>> { zero_or_more(right(space1(), attribute_pair())) } fn element_start<'a>() -> impl Parser<'a, (String, Vec<(String, String)>)> { right(match_literal("<"), pair(identifier, attributes())) } fn single_element<'a>() -> impl Parser<'a, Element> { left(element_start(), match_literal("/>")).map(|(name, attributes)| Element { name, attributes, children: vec![], }) } fn open_element<'a>() -> impl Parser<'a, Element> { left(element_start(), match_literal(">")).map(|(name, attributes)| Element { name, attributes, children: vec![], }) } fn either<'a, P1, P2, A>(parser1: P1, parser2: P2) -> impl Parser<'a, A> where P1: Parser<'a, A>, P2: Parser<'a, A>, { move |input| match parser1.parse(input) { ok @ Ok(_) => ok, Err(_) => parser2.parse(input), } } pub fn element<'a>() -> impl Parser<'a, Element> { whitespace_wrap(either(single_element(), parent_element())) } fn close_element<'a>(expected_name: String) -> impl Parser<'a, String> { right(match_literal(""))) .pred(move |name| name == &expected_name) } fn parent_element<'a>() -> impl Parser<'a, Element> { open_element().and_then(|el| { left(zero_or_more(element()), close_element(el.name.clone())).map(move |children| { let mut el = el.clone(); el.children = children; el }) }) } fn and_then<'a, P, F, A, B, NextP>(parser: P, f: F) -> impl Parser<'a, B> where P: Parser<'a, A>, NextP: Parser<'a, B>, F: Fn(A) -> NextP, { move |input| match parser.parse(input) { Ok((next_input, result)) => f(result).parse(next_input), Err(err) => Err(err), } } fn whitespace_wrap<'a, P, A>(parser: P) -> impl Parser<'a, A> where P: Parser<'a, A>, { right(space0(), left(parser, space0())) } #[cfg(test)] mod tests { use super::*; #[test] fn literal_parser() { let parse_joe = match_literal("Hello Joe!"); assert_eq!(Ok(("", ())), parse_joe.parse("Hello Joe!")); assert_eq!( Ok((" Hello Robert!", ())), parse_joe.parse("Hello Joe! Hello Robert!") ); assert_eq!(Err("Hello Mike!"), parse_joe.parse("Hello Mike!")); } #[test] fn identifier_parser() { assert_eq!( Ok(("", "i-am-an-identifier".to_string())), identifier("i-am-an-identifier") ); assert_eq!( Ok((" entirely an identifier", "not".to_string())), identifier("not entirely an identifier") ); assert_eq!( Err("!not at all an identifier"), identifier("!not at all an identifier") ); } #[test] fn pair_combinator() { let tag_opener = pair(match_literal("<"), identifier); assert_eq!( Ok(("/>", ((), "my-first-element".to_string()))), tag_opener.parse("") ); assert_eq!(Err("oops"), tag_opener.parse("oops")); assert_eq!(Err("!oops"), tag_opener.parse("!oops")); } #[test] fn right_combinator() { let tag_opener = right(match_literal("<"), identifier); assert_eq!( Ok(("/>", "my-first-element".to_string())), tag_opener.parse("") ); assert_eq!(Err("oops"), tag_opener.parse("oops")); assert_eq!(Err("!oops"), tag_opener.parse("") ) } #[test] fn xml_parser() { let doc = r#" "#; let parsed_doc = Element { name: "top".to_string(), attributes: vec![("label".to_string(), "Top".to_string())], children: vec![ Element { name: "semi-bottom".to_string(), attributes: vec![("label".to_string(), "Bottom".to_string())], children: vec![], }, Element { name: "middle".to_string(), attributes: vec![], children: vec![Element { name: "bottom".to_string(), attributes: vec![("label".to_string(), "Another bottom".to_string())], children: vec![] }], }, ], }; assert_eq!(Ok(("", parsed_doc)), element().parse(doc)); } #[test] fn mismatched_closing_tag() { let doc = r#" "#; assert_eq!(Err(""), element().parse(doc)); } }