commit 97433c2e7fb9052476bd8d76ad33dc36796e3c6b Author: Timothy Warren Date: Mon Apr 22 13:56:17 2019 -0400 Finished tutorial code diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2f88dba --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +**/*.rs.bk +Cargo.lock \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..9ec57bc --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "xml-pc" +version = "0.1.0" +authors = ["Timothy Warren "] +edition = "2018" + +[dependencies] diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..9d90b1c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,459 @@ +#[derive(Clone, Debug, PartialEq, Eq)] +struct Element { + name: String, + attributes: Vec<(String, String)>, + children: Vec, +} + +struct BoxedParser<'a, Output> { + parser: Box + 'a>, +} + +type ParseResult<'a, Output> = Result<(&'a str, Output), &'a str>; + +trait Parser<'a, Output> { + fn parse(&self, input: &'a str) -> ParseResult<'a, Output>; + + fn map(self, map_fn: F) -> BoxedParser<'a, NewOutput> + where + Self: Sized + 'a, + Output: 'a, + NewOutput: 'a, + F: Fn(Output) -> NewOutput + 'a, + { + BoxedParser::new(map(self, map_fn)) + } + + fn pred(self, pred_fn: F) -> BoxedParser<'a, Output> + where + Self: Sized + 'a, + Output: 'a, + F: Fn(&Output) -> bool + 'a, + { + BoxedParser::new(pred(self, pred_fn)) + } + + fn and_then(self, f: F) -> BoxedParser<'a, NewOutput> + where + Self: Sized + 'a, + Output: 'a, + NewOutput: 'a, + NextParser: Parser<'a, NewOutput> + 'a, + F: Fn(Output) -> NextParser + 'a, + { + BoxedParser::new(and_then(self, f)) + } +} + +impl<'a, F, Output> Parser<'a, Output> for F +where + F: Fn(&'a str) -> ParseResult, +{ + fn parse(&self, input: &'a str) -> ParseResult<'a, Output> { + self(input) + } +} + +impl<'a, Output> BoxedParser<'a, Output> { + fn new

(parser: P) -> Self + where + P: Parser<'a, Output> + 'a, + { + BoxedParser { + parser: Box::new(parser), + } + } +} + +impl<'a, Output> Parser<'a, Output> for BoxedParser<'a, Output> { + fn parse(&self, input: &'a str) -> ParseResult<'a, Output> { + self.parser.parse(input) + } +} + +fn match_literal<'a>(expected: &'static str) -> impl Parser<'a, ()> { + move |input: &'a str| match input.get(0..expected.len()) { + Some(next) if next == expected => Ok((&input[expected.len()..], ())), + _ => Err(input), + } +} + +fn identifier(input: &str) -> ParseResult { + let mut matched = String::new(); + let mut chars = input.chars(); + + match chars.next() { + Some(next) if next.is_alphabetic() => matched.push(next), + _ => return Err(input), + } + + while let Some(next) = chars.next() { + if next.is_alphanumeric() || next == '-' { + matched.push(next); + } else { + break; + } + } + + let next_index = matched.len(); + Ok((&input[next_index..], matched)) +} + +fn pair<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, (R1, R2)> +where + P1: Parser<'a, R1>, + P2: Parser<'a, R2>, +{ + move |input| { + parser1.parse(input).and_then(|(next_input, result1)| { + parser2 + .parse(next_input) + .map(|(last_input, result2)| (last_input, (result1, result2))) + }) + } +} + +fn map<'a, P, F, A, B>(parser: P, map_fn: F) -> impl Parser<'a, B> +where + P: Parser<'a, A>, + F: Fn(A) -> B, +{ + move |input| { + parser + .parse(input) + .map(|(next_input, result)| (next_input, map_fn(result))) + } +} + +fn left<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, R1> +where + P1: Parser<'a, R1>, + P2: Parser<'a, R2>, +{ + map(pair(parser1, parser2), |(left, _right)| left) +} + +fn right<'a, P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Parser<'a, R2> +where + P1: Parser<'a, R1>, + P2: Parser<'a, R2>, +{ + map(pair(parser1, parser2), |(_left, right)| right) +} + +fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec> +where + P: Parser<'a, A>, +{ + move |mut input| { + let mut result = Vec::new(); + + if let Ok((next_input, first_item)) = parser.parse(input) { + input = next_input; + result.push(first_item); + } else { + return Err(input); + } + + while let Ok((next_input, next_item)) = parser.parse(input) { + input = next_input; + result.push(next_item); + } + + Ok((input, result)) + } +} + +fn zero_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec> +where + P: Parser<'a, A>, +{ + move |mut input| { + let mut result = Vec::new(); + + while let Ok((next_input, next_item)) = parser.parse(input) { + input = next_input; + result.push(next_item); + } + + Ok((input, result)) + } +} + +fn any_char(input: &str) -> ParseResult { + match input.chars().next() { + Some(next) => Ok((&input[next.len_utf8()..], next)), + _ => Err(input), + } +} + +fn pred<'a, P, A, F>(parser: P, predicate: F) -> impl Parser<'a, A> +where + P: Parser<'a, A>, + F: Fn(&A) -> bool, +{ + move |input| { + if let Ok((next_input, value)) = parser.parse(input) { + if predicate(&value) { + return Ok((next_input, value)); + } + } + Err(input) + } +} + +fn whitespace_char<'a>() -> impl Parser<'a, char> { + pred(any_char, |c| c.is_whitespace()) +} + +fn space1<'a>() -> impl Parser<'a, Vec> { + one_or_more(whitespace_char()) +} + +fn space0<'a>() -> impl Parser<'a, Vec> { + zero_or_more(whitespace_char()) +} + +fn quoted_string<'a>() -> impl Parser<'a, String> { + right( + match_literal("\""), + left( + zero_or_more(any_char.pred(|c| *c != '"')), + match_literal("\""), + ), + ) + .map(|chars| chars.into_iter().collect()) +} + +fn attribute_pair<'a>() -> impl Parser<'a, (String, String)> { + pair(identifier, right(match_literal("="), quoted_string())) +} + +fn attributes<'a>() -> impl Parser<'a, Vec<(String, String)>> { + zero_or_more(right(space1(), attribute_pair())) +} + +fn element_start<'a>() -> impl Parser<'a, (String, Vec<(String, String)>)> { + right(match_literal("<"), pair(identifier, attributes())) +} + +fn single_element<'a>() -> impl Parser<'a, Element> { + left(element_start(), match_literal("/>")).map(|(name, attributes)| Element { + name, + attributes, + children: vec![], + }) +} + +fn open_element<'a>() -> impl Parser<'a, Element> { + left(element_start(), match_literal(">")).map(|(name, attributes)| Element { + name, + attributes, + children: vec![], + }) +} + +fn either<'a, P1, P2, A>(parser1: P1, parser2: P2) -> impl Parser<'a, A> +where + P1: Parser<'a, A>, + P2: Parser<'a, A>, +{ + move |input| match parser1.parse(input) { + ok @ Ok(_) => ok, + Err(_) => parser2.parse(input), + } +} + +fn element<'a>() -> impl Parser<'a, Element> { + whitespace_wrap(either(single_element(), parent_element())) +} + +fn close_element<'a>(expected_name: String) -> impl Parser<'a, String> { + right(match_literal(""))) + .pred(move |name| name == &expected_name) +} + +fn parent_element<'a>() -> impl Parser<'a, Element> { + open_element().and_then(|el| { + left(zero_or_more(element()), close_element(el.name.clone())).map(move |children| { + let mut el = el.clone(); + el.children = children; + el + }) + }) +} + +fn and_then<'a, P, F, A, B, NextP>(parser: P, f: F) -> impl Parser<'a, B> +where + P: Parser<'a, A>, + NextP: Parser<'a, B>, + F: Fn(A) -> NextP, +{ + move |input| match parser.parse(input) { + Ok((next_input, result)) => f(result).parse(next_input), + Err(err) => Err(err), + } +} + +fn whitespace_wrap<'a, P, A>(parser: P) -> impl Parser<'a, A> +where + P: Parser<'a, A>, +{ + right(space0(), left(parser, space0())) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn literal_parser() { + let parse_joe = match_literal("Hello Joe!"); + assert_eq!(Ok(("", ())), parse_joe.parse("Hello Joe!")); + assert_eq!( + Ok((" Hello Robert!", ())), + parse_joe.parse("Hello Joe! Hello Robert!") + ); + assert_eq!(Err("Hello Mike!"), parse_joe.parse("Hello Mike!")); + } + + #[test] + fn identifier_parser() { + assert_eq!( + Ok(("", "i-am-an-identifier".to_string())), + identifier("i-am-an-identifier") + ); + assert_eq!( + Ok((" entirely an identifier", "not".to_string())), + identifier("not entirely an identifier") + ); + assert_eq!( + Err("!not at all an identifier"), + identifier("!not at all an identifier") + ); + } + + #[test] + fn pair_combinator() { + let tag_opener = pair(match_literal("<"), identifier); + assert_eq!( + Ok(("/>", ((), "my-first-element".to_string()))), + tag_opener.parse("") + ); + assert_eq!(Err("oops"), tag_opener.parse("oops")); + assert_eq!(Err("!oops"), tag_opener.parse("!oops")); + } + + #[test] + fn right_combinator() { + let tag_opener = right(match_literal("<"), identifier); + assert_eq!( + Ok(("/>", "my-first-element".to_string())), + tag_opener.parse("") + ); + assert_eq!(Err("oops"), tag_opener.parse("oops")); + assert_eq!(Err("!oops"), tag_opener.parse("") + ) + } + + #[test] + fn xml_parser() { + let doc = r#" + + + + + + "#; + let parsed_doc = Element { + name: "top".to_string(), + attributes: vec![("label".to_string(), "Top".to_string())], + children: vec![ + Element { + name: "semi-bottom".to_string(), + attributes: vec![("label".to_string(), "Bottom".to_string())], + children: vec![], + }, + Element { + name: "middle".to_string(), + attributes: vec![], + children: vec![Element { + name: "bottom".to_string(), + attributes: vec![("label".to_string(), "Another bottom".to_string())], + children: vec![] + }], + }, + ], + }; + assert_eq!(Ok(("", parsed_doc)), element().parse(doc)); + } + + #[test] + fn mismatched_closing_tag() { + let doc = r#" + + + "#; + + assert_eq!(Err(""), element().parse(doc)); + } +}