//! # Naive JSON Parser //! //! Based on [JSON Parser with JavaScript](https://lihautan.com/json-parser-with-javascript/) #![forbid(unsafe_code)] use std::collections::HashMap; use std::iter::FromIterator; use std::{char, u16}; /// The type of JSON value #[derive(Debug, PartialEq)] pub enum JSONValue { /// Object Literal Object(HashMap), /// Array Literal Array(Vec), /// String Literal String(String), /// Number Literal Number(f64), /// True Literal True, /// False Literal False, /// Null Literal Null, } /// The type of error returned by the parser #[derive(Debug, PartialEq)] pub enum ParseError { /// The input looks like JSON, but seems to end UnexpectedEndOfInput(String), /// Looks like JSON, but seems to have characters after it should ExpectedEndOfInput(String), /// Wasn't this supposed to be an object literal? ExpectedObjectKey(String), /// Hey, wasn't there supposed to be...? ExpectedToken(String), /// What's this character? UnexpectedToken(String), /// Shouldn't this be a numeral? ExpectedDigit(String), /// There's a backslash...were you going somewhere with that? ExpectedEscapeChar(String), /// Should be a unicode escape character...missing a few digits? ExpectedUnicodeEscape(String), } /// This struct holds a little state for parsing #[derive(Debug, PartialEq)] pub struct JSON { /// The input JSON String as a character array chars: Vec, /// The internal parsing index i: usize, } impl JSON { /// Private constructor fn new(json: &str) -> Self { JSON { chars: json.chars().collect(), i: 0, } } /// Parse a `JSONValue` from the current JSON string fn parse_value(&mut self) -> Result { self.skip_whitespace(); return if let Some(string) = self.parse_string()? { Ok(string) } else if let Some(number) = self.parse_number()? { Ok(number) } else if let Some(object) = self.parse_object()? { Ok(object) } else if let Some(array) = self.parse_array()? { Ok(array) } else if let Some(t) = self.parse_keyword("true", JSONValue::True)? { Ok(t) } else if let Some(f) = self.parse_keyword("false", JSONValue::False)? { Ok(f) } else if let Some(n) = self.parse_keyword("null", JSONValue::Null)? { Ok(n) } else { Err(ParseError::UnexpectedEndOfInput(String::from( "Doesn't seem to be valid JSON", ))) }; // Eagerly evaluated simpler alternative to the original option iterator chain // let value = self.parse_string()? // .or(self.parse_number()?) // .or(self.parse_object()?) // .or(self.parse_array()?) // .or(self.parse_keyword("true", JSONValue::True)?) // .or(self.parse_keyword("false", JSONValue::False)?) // .or(self.parse_keyword("null", JSONValue::Null)?); } /// See if there's a `JSONValue::Object` next in the JSON fn parse_object(&mut self) -> Result, ParseError> { if self.chars[self.i] != '{' { return Ok(None); } self.increment(1); self.skip_whitespace(); let mut result: HashMap = HashMap::new(); let mut initial = true; // if it is not '}', // we take the path of string -> whitespace -> ':' -> value -> ... while self.chars[self.i] != '}' { self.skip_whitespace(); if initial == false { self.eat(',')?; self.skip_whitespace(); } self.skip_whitespace(); let key = match self.parse_string()? { Some(value) => match value { JSONValue::String(s) => s, _ => panic!("parse_string returned non-string value"), }, None => panic!("Missing object key"), }; self.skip_whitespace(); self.eat(':')?; let value = self.parse_value()?; result.insert(key, value); initial = false; self.skip_whitespace(); } // Move to the next character: '}' self.increment(1); Ok(Some(JSONValue::Object(result))) } /// See if there's a `JSONValue::Array` next in the JSON fn parse_array(&mut self) -> Result, ParseError> { if self.chars[self.i] != '[' { return Ok(None); } self.increment(1); self.skip_whitespace(); let mut result: Vec = vec![]; let mut initial = true; while self.chars[self.i] != ']' { self.skip_whitespace(); if initial == false { self.eat(',')?; } let value = self.parse_value()?; result.push(value); initial = false; } // move to next character: ']' self.increment(1); Ok(Some(JSONValue::Array(result))) } /// See if there's a `JSONValue::String` next in the JSON fn parse_string(&mut self) -> Result, ParseError> { if self.chars[self.i] != '"' { return Ok(None); } self.increment(1); let mut result = String::new(); while self.chars[self.i] != '"' && self.i < self.chars.len() - 1 { // All the escape sequences... if self.chars[self.i] == '\\' { let ch = self.chars[self.i + 1]; if ['"', '\\', '/'].contains(&ch) { let escaped = ch.escape_default().next().unwrap_or(ch); result.push(escaped); self.increment(1); } else if ['b', 'f', 'n', 'r', 't'].contains(&ch) { let ch = match ch { 'b' => '\u{8}', 'f' => '\x0C', 'n' => '\n', 'r' => '\r', 't' => '\t', _ => panic!("Shouldn't be possible!"), }; result.push(ch); self.increment(1); } else if ch == 'u' && self.chars[self.i + 2].is_ascii_hexdigit() && self.chars[self.i + 3].is_ascii_hexdigit() && self.chars[self.i + 4].is_ascii_hexdigit() && self.chars[self.i + 5].is_ascii_hexdigit() { // Blech, parse out a JSON unicode (utf16) escape code. Handles surrogate pairs // by giving you the replacement character...because...yeah let char_str = String::from_iter(&self.chars[self.i + 2..=self.i + 5]); let code = u16::from_str_radix(&char_str, 16) .expect("Failed to parse unicode escape number"); let string = String::from_utf16_lossy(&[code]); result.push_str(&string); self.increment(5); } } else { result.push(self.chars[self.i]); } self.increment(1); } self.eat('"')?; Ok(Some(JSONValue::String(result))) } /// See if there's a `JSONValue::Number` next in the JSON fn parse_number(&mut self) -> Result, ParseError> { let start = self.i; // If it doesn't start with 0-9 or a minus sign, it's probably not a number if !(self.chars[start].is_ascii_digit() || self.chars[start] == '-') { return Ok(None); } // All this looping basically just counts the number of characters in the number let max = self.chars.len() - 1; let mut n = start; // Minus sign if self.chars[n] == '-' && n < max { n += 1; } // Integer Part while self.chars[n].is_ascii_digit() && n < max { n += 1; } // Decimal Part if self.chars[n] == '.' && n < max { n += 1; while self.chars[n].is_ascii_digit() && n < max { n += 1; } } // Scientific notation part if self.chars[n] == 'e' || self.chars[n] == 'E' && n < max { n += 1; if self.chars[n] == '-' || self.chars[n] == '+' && n < max { n += 1; } // Exponent base while self.chars[n].is_ascii_digit() && n < max { n += 1; } } // If there are numeric digits attempt to parse the digits as a number if n > start { let mut end = if n < self.chars.len() { n } else { max }; // Hack to remove non-number characters if !self.chars[end].is_ascii_digit() { end -= 1; } let str = String::from_iter(&self.chars[start..=end]); match str.parse::() { Ok(number) => { self.increment(str.len()); return Ok(Some(JSONValue::Number(number))); } Err(e) => Err(ParseError::ExpectedDigit(format!("'{}', {:#?}", str, e))), } } else { Ok(None) } } /// See if there's a `JSONValue::True`, `JSONValue::False`, or a `JSONValue::Null` next in the JSON fn parse_keyword( &mut self, search: &str, value: JSONValue, ) -> Result, ParseError> { let start = self.i; let end = if self.i + search.len() > self.chars.len() { self.chars.len() } else { self.i + search.len() }; let slice = &String::from_iter(&self.chars[start..end]); if slice == search { self.i += search.len(); return Ok(Some(value)); } Ok(None) } /// Increment the internal index until the next character is not a whitespace character fn skip_whitespace(&mut self) { while self.chars[self.i].is_ascii_whitespace() { self.increment(1); } } /// 'Eat' the specified character /// /// * If the next `char` matches the one passed, the internal index is incremented /// * If the next `char` does not match the one passed, a `ParseError::ExpectedToken` /// error is returned fn eat(&mut self, ch: char) -> Result<(), ParseError> { if self.chars[self.i] != ch { let msg = format!("Expected {}.", ch); return Err(ParseError::ExpectedToken(msg)); } self.skip(); Ok(()) } /// Skip a character fn skip(&mut self) { self.increment(1); } /// Do a checked increment of the internal pointer index fn increment(&mut self, amount: usize) { let current = self.i; if current + amount >= self.chars.len() { self.i = self.chars.len() - 1; } else { self.i += amount; } } /// Convert a `&str` containing JSON into a `Result` pub fn parse(json: &str) -> Result { JSON::new(json).parse_value() } } #[cfg(test)] mod tests { use super::*; #[test] fn parse_keyword() { let mut parser = JSON::new(r#""foobarbaz""#); let res = JSON::parse_keyword(&mut parser, "true", JSONValue::True); assert_eq!(res, Ok(None)); let mut parser = JSON::new("true"); let res = JSON::parse_keyword(&mut parser, "true", JSONValue::True); assert_eq!(res, Ok(Some(JSONValue::True))); } #[test] fn skip_whitespace() { let mut parser = JSON::new(" \t\r\nx"); parser.skip_whitespace(); assert_eq!('x', parser.chars[parser.i]); } #[test] fn parse_string() { let mut parser = JSON::new(r#""\t""#); let res = JSON::parse_string(&mut parser); assert_eq!(res, Ok(Some(JSONValue::String(String::from("\t"))))); let mut parser = JSON::new(r#""\u203d""#); let res = JSON::parse_string(&mut parser); assert_eq!(res, Ok(Some(JSONValue::String(String::from("‽"))))); } #[test] fn parse_empty_array() { let mut parser = JSON::new("[]"); let res = JSON::parse_value(&mut parser); assert_eq!(res, Ok(JSONValue::Array(vec![]))); } #[test] fn parse_number() { // This function works like I think, right? assert_ne!(','.is_ascii_digit(), true); let mut parser = JSON::new(r#""foo""#); let res = JSON::parse_number(&mut parser); assert_eq!(res, Ok(None)); let mut parser = JSON::new("3.14159"); let res = JSON::parse_number(&mut parser); assert_eq!(res, Ok(Some(JSONValue::Number(3.14159f64)))); let mut parser = JSON::new("3e4"); let res = JSON::parse_number(&mut parser); assert_eq!(res, Ok(Some(JSONValue::Number(3e4f64)))); let mut parser = JSON::new("1.234,"); let res = JSON::parse_number(&mut parser); assert_eq!(res, Ok(Some(JSONValue::Number(1.234f64)))); } #[test] fn can_parse_array_of_keywords() { let result = JSON::parse("[true,false,null]"); assert_eq!( result, Ok(JSONValue::Array(vec![ JSONValue::True, JSONValue::False, JSONValue::Null ])) ); } #[test] fn parse_object() { let mut parser = JSON::new(r#"{"foo": "bar"}"#); let result = JSON::parse_object(&mut parser); let mut hash_map: HashMap = HashMap::new(); hash_map.insert(String::from("foo"), JSONValue::String(String::from("bar"))); assert_eq!(result, Ok(Some(JSONValue::Object(hash_map)))); } #[test] fn parse_json_types() { // Boolean / Null let res = JSON::parse("true"); assert_eq!(res, Ok(JSONValue::True)); let res = JSON::parse("false"); assert_eq!(res, Ok(JSONValue::False)); let res = JSON::parse("null"); assert_eq!(res, Ok(JSONValue::Null)); // Number let res = JSON::parse("9.38083151965"); assert_eq!( res, Ok(JSONValue::Number(9.38083151965)), "Failed to parse number" ); // String let res = JSON::parse(r#""/^$/""#); assert_eq!( res, Ok(JSONValue::String(String::from("/^$/"))), "Failed to parse string" ); // Number array let res = JSON::parse("[1, 2, 3]"); assert_eq!( res, Ok(JSONValue::Array(vec![ JSONValue::Number(1f64), JSONValue::Number(2f64), JSONValue::Number(3f64) ])) ); // Object array let result = JSON::parse("[{}]"); assert_eq!( result, Ok(JSONValue::Array(vec![JSONValue::Object(HashMap::new())])) ); } #[test] fn parse_nested_object() { let res = JSON::parse(r#"{"a": {"b": []}}"#); assert!(res.is_ok(), format!("{:#?}", res)); } #[test] fn can_parse_arbitrary_json() { let result = JSON::parse(r#"[{ "a": 9.38083151965, "b": 4e3 }]"#); assert!( result.is_ok(), format!("Failed on just number values: {:#?}", result) ); let result = JSON::parse( r#"[{ "a": 9.38083151965, "b": 4e3, "c": [1, 2, 3], "d": "foo", "e": { "f": { "g": { "h": null } } }, "i": ["\"", "\\", "/", "\b", "\f", "\n", "\r", "\t", "\u0001", "\uface"] }]"#, ); assert!(result.is_ok(), format!("{:#?}", result)); } }