//! # Naive JSON Parser //! //! Based on [JSON Parser with JavaScript](https://lihautan.com/json-parser-with-javascript/) #![forbid(unsafe_code)] use std::collections::HashMap; use std::iter::FromIterator; /// The type of JSON value #[derive(Debug, PartialEq)] pub enum JSONValue { /// Object Literal Object(HashMap), /// Array Literal Array(Vec), /// String Literal String(String), /// Number Literal Number(f64), /// True Literal True, /// False Literal False, /// Null Literal Null, } /// The type of error returned by the parser #[derive(Debug, PartialEq)] pub enum ParseError { /// The input looks like JSON, but seems to end UnexpectedEndOfInput(String), /// Looks like JSON, but seems to have characters after it should ExpectedEndOfInput(String), /// Wasn't this supposed to be an object literal? ExpectedObjectKey(String), /// Hey, wasn't there supposed to be...? ExpectedToken(String), /// What's this character? UnexpectedToken(String), /// Shouldn't this be a numeral? ExpectedDigit(String), /// There's a backslash...were you going somewhere with that? ExpectedEscapeChar(String), /// Should be a unicode escape character...missing a few digits? ExpectedUnicodeEscape(String), } /// This struct holds a little state for parsing #[derive(Debug, PartialEq)] pub struct JSON { /// The input JSON String as a character array chars: Vec, /// The internal parsing index i: usize, } impl JSON { /// Private constructor fn new(json: &str) -> Self { JSON { chars: json.chars().collect(), i: 0, } } /// Parse a `JSONValue` from the current JSON string /// /// This is probably an abuse of iterators...but it's still much better than the alternative /// of nested matches. /// /// In order to determine the type of JSON value, each parse method is tried, until one /// matches, or a parse error happens. /// /// * `Option`s implement IntoIterator, which returns an iterator of -1 or 1 items: the /// transferred (not borrowed) Some() value. /// * The `chain` method of iterators allows you to link iterators together, /// to act as one iterator /// * The first result from the iterator is the first parse method with a non-empty value, /// and should be the value wanted fn parse_value(&mut self) -> Result { self.skip_whitespace(); let mut value = self .parse_string()? .into_iter() .chain(self.parse_number()?.into_iter()) .chain(self.parse_object()?.into_iter()) .chain(self.parse_array()?.into_iter()) .chain(self.parse_keyword("true", JSONValue::True)?.into_iter()) .chain(self.parse_keyword("false", JSONValue::False)?.into_iter()) .chain(self.parse_keyword("null", JSONValue::Null)?.into_iter()); match value.next() { Some(val) => Ok(val), None => Err(ParseError::UnexpectedEndOfInput(String::from( "Doesn't seem to be valid JSON", ))), } } /// See if there's a `JSONValue::Object` next in the JSON fn parse_object(&mut self) -> Result, ParseError> { if self.chars[self.i] != '{' { return Ok(None); } self.i += 1; self.skip_whitespace(); let mut result: HashMap = HashMap::new(); let mut initial = true; // if it is not '}', // we take the path of string -> whitespace -> ':' -> value -> ... while self.chars[self.i] != '}' { if initial == false { self.eat(',')?; self.skip_whitespace(); } let key = match self.parse_string()? { Some(value) => match value { JSONValue::String(s) => s, _ => panic!("parse_string returned non-string value"), }, None => String::new(), }; self.skip_whitespace(); self.eat(':')?; let value = self.parse_value()?; result.insert(key, value); initial = false; } // Move to the next character: '}' self.i += 1; Ok(Some(JSONValue::Object(result))) } /// See if there's a `JSONValue::Array` next in the JSON fn parse_array(&mut self) -> Result, ParseError> { if self.chars[self.i] != '[' { return Ok(None); } self.i += 1; self.skip_whitespace(); let mut result: Vec = vec![]; let mut initial = true; while self.chars[self.i] != ']' { if initial == false { self.eat(',')?; } let value = self.parse_value()?; result.push(value); initial = false; } // move to next character: ']' self.i += 1; Ok(Some(JSONValue::Array(result))) } /// See if there's a `JSONValue::String` next in the JSON fn parse_string(&mut self) -> Result, ParseError> { if self.chars[self.i] != '"' { return Ok(None); } self.i += 1; let mut result = String::new(); while self.chars[self.i] != '"' { if self.chars[self.i] == '\\' { todo!(); } } Ok(None) } /// See if there's a `JSONValue::Number` next in the JSON fn parse_number(&mut self) -> Result, ParseError> { Ok(None) } /// See if there's a `JSONValue::True`, `JSONValue::False`, or a `JSONValue::Null` next in the JSON fn parse_keyword( &mut self, search: &str, value: JSONValue, ) -> Result, ParseError> { let start = self.i; let end = if self.i + search.len() > self.chars.len() { self.chars.len() } else { self.i + search.len() }; let slice = &String::from_iter(&self.chars[start..end]); if slice == search { self.i += search.len(); return Ok(Some(value)); } Ok(None) } /// Increment the internal index until the next character is not a whitespace character fn skip_whitespace(&mut self) { while self.chars[self.i].is_ascii_whitespace() { self.i += 1; } } /// 'Eat' the specified character /// /// * If the next `char` matches the one passed, the internal index is incremented /// * If the next `char` does not match the one passed, a `ParseError::ExpectedToken` /// error is returned fn eat(&mut self, ch: char) -> Result<(), ParseError> { if self.chars[self.i] != ch { let msg = format!(r#"Expected "{}"."#, ch); return Err(ParseError::ExpectedToken(msg)); } self.i += 1; Ok(()) } /// Convert a `&str` containing JSON into a `Result` pub fn parse(json: &str) -> Result { JSON::new(json).parse_value() } } #[cfg(test)] mod tests { use super::*; #[test] fn parse_keyword() { let mut parser = JSON::new(r#""foobarbaz""#); let res = JSON::parse_keyword(&mut parser, "true", JSONValue::True); assert_eq!(res, Ok(None)); let mut parser = JSON::new("true"); let res = JSON::parse_keyword(&mut parser, "true", JSONValue::True); assert_eq!(res, Ok(Some(JSONValue::True))); } #[test] fn skip_whitespace() { let mut parser = JSON::new(" \t\r\nx"); parser.skip_whitespace(); assert_eq!('x', parser.chars[parser.i]); } #[test] fn parse_empty_array() { let mut parser = JSON::new("[]"); let res = JSON::parse_value(&mut parser); assert_eq!(res, Ok(JSONValue::Array(vec![]))); } #[test] fn can_parse_array_of_keywords() { let result = JSON::parse("[true,false,null]"); assert_eq!( result, Ok(JSONValue::Array(vec![ JSONValue::True, JSONValue::False, JSONValue::Null ])) ); } }