//! # Naive JSON Parser //! //! Based on [JSON Parser with JavaScript](https://lihautan.com/json-parser-with-javascript/) #![forbid(unsafe_code)] use std::collections::HashMap; use std::iter::FromIterator; use std::{char, u16}; pub type JSONResult = Result; pub type JSONArray = Vec; pub type JSONMap = HashMap; /// The type of JSON value #[derive(Clone, Debug, PartialEq)] pub enum JSONValue { /// Object Literal Object(HashMap), /// Array Literal Array(Vec), /// String Literal String(String), /// Number Literal Number(f64), /// True Literal True, /// False Literal False, /// Null Literal Null, } impl JSONValue { /// Convert the wrapped JSONValue to its simpler rust value /// /// Example: /// ``` /// use naive_json_parser::JSONValue; /// /// let str = "Four score and seven years ago..."; /// let wrapped = JSONValue::from(str); /// /// // s is now the `String` that was in the `JSONValue` enum /// let s: String = wrapped.unwrap(); /// /// # assert_eq!(str, &s); /// ``` pub fn unwrap>(self) -> T { T::from(self) } } impl From for JSONMap { /// Extracts the `HashMap` in the `JSONValue` enum, if it exists. /// Otherwise, panics. fn from(val: JSONValue) -> JSONMap { match val { JSONValue::Object(o) => o, _ => panic!("Invalid type conversion") } } } impl From for JSONArray{ /// Extracts the `Vec` in the `JSONValue` enum, if it exists. /// Otherwise, panics. fn from(val: JSONValue) -> JSONArray { match val { JSONValue::Array(a) => a, _ => panic!("Invalid type conversion") } } } impl From for f64 { /// Extracts the `f64` in the `JSONValue` enum, if it exists. /// Otherwise, panics. fn from(val: JSONValue) -> f64 { match val { JSONValue::Number(n) => n, _ => panic!("Invalid type conversion") } } } impl From for String { /// Extracts the `String` in the `JSONValue` enum, if it exists. /// Otherwise, panics. fn from(val: JSONValue) -> String { match val { JSONValue::String(s) => s, _ => panic!("Invalid type conversion") } } } impl From for bool { /// Extracts the `bool` value from the `JSONValue` enum, if it exists. /// Otherwise, panics. fn from(val: JSONValue) -> bool { match val { JSONValue::True => true, JSONValue::False => false, _ => panic!("Invalid type conversion") } } } impl From for () { /// This will just swallow the enum value and return a unit tuple fn from(_: JSONValue) -> () { () } } impl From for JSONValue { /// Wraps the `HashMap` in the `JSONValue` enum fn from(val: JSONMap) -> JSONValue { Self::Object(val) } } impl From for JSONValue { /// Wraps the `Vec` in the `JSONValue` enum fn from(val: JSONArray) -> JSONValue { Self::Array(val) } } impl From for JSONValue { /// Sets the `JSONValue` enum to the `True` or `False` value fn from(val: bool) -> Self { match val { true => Self::True, false => Self::False, } } } impl From for JSONValue { /// Wraps the `f64` in the `JSONValue` enum fn from(n: f64) -> Self { Self::Number(n) } } impl From<()> for JSONValue { /// Sets the `JSONValue` enum to the `Null` value fn from(_s: ()) -> Self { Self::Null } } impl From for JSONValue { /// Wraps the `String` in the `JSONValue` enum fn from(s: String) -> Self { Self::String(s) } } impl From<&str> for JSONValue { /// Creates a `String` and wraps it in the `JSONValue` enum fn from(s: &str) -> Self { Self::String(String::from(s)) } } /// The type of error returned by the parser #[derive(Debug, PartialEq)] pub enum ParseError { /// The input looks like JSON, but seems to end UnexpectedEndOfInput(String), /// Looks like JSON, but seems to have characters after it should ExpectedEndOfInput(String), /// Wasn't this supposed to be an object literal? ExpectedObjectKey(String), /// Hey, wasn't there supposed to be...? ExpectedToken(String), /// What's this character? UnexpectedToken(String), /// Shouldn't this be a numeral? ExpectedDigit(String), /// There's a backslash...were you going somewhere with that? ExpectedEscapeChar(String), /// Should be a unicode escape character...missing a few digits? ExpectedUnicodeEscape(String), } /// This struct holds a little state for parsing #[derive(Debug, PartialEq)] pub struct JSON { /// The input JSON String as a character array chars: Vec, /// The internal parsing index i: usize, } /// Cut down the if boilerplate /// /// Thanks to `uwaterloodudette` on reddit macro_rules! try_parse { ($( $e:expr ),* ) => { $( if let Some(v) = $e? { return Ok(v); } )* }; } impl JSON { /// Private constructor fn new(json: &str) -> Self { JSON { chars: json.chars().collect(), i: 0, } } /// Parse a `JSONValue` from the current JSON string fn parse_value(&mut self) -> JSONResult { self.skip_whitespace(); // Go through the parser methods, until you find // one that doesn't return a `None` try_parse!( self.parse_string(), self.parse_number(), self.parse_object(), self.parse_array(), self.parse_keyword("true", JSONValue::True), self.parse_keyword("false", JSONValue::False), self.parse_keyword("null", JSONValue::Null) ); // Every parser failed, so the syntax is probably incorrect Err(ParseError::UnexpectedEndOfInput(format!("Doesn't seem to be valid JSON"))) } /// See if there's a `JSONValue::Object` next in the JSON fn parse_object(&mut self) -> Result, ParseError> { if self.chars[self.i] != '{' { return Ok(None); } self.increment(1); self.skip_whitespace(); let mut result: JSONMap = HashMap::new(); let mut initial = true; // if it is not '}', // we take the path of string -> whitespace -> ':' -> value -> ... while self.chars[self.i] != '}' { self.skip_whitespace(); if initial == false { self.eat(',')?; self.skip_whitespace(); } self.skip_whitespace(); let key = match self.parse_string()? { Some(value) => match value { JSONValue::String(s) => s, _ => panic!("parse_string returned non-string value"), }, None => String::new(), }; self.skip_whitespace(); self.eat(':')?; let value = self.parse_value()?; result.insert(key, value); initial = false; self.skip_whitespace(); } // Move to the next character: '}' self.increment(1); Ok(Some(JSONValue::from(result))) } /// See if there's a `JSONValue::Array` next in the JSON fn parse_array(&mut self) -> Result, ParseError> { if self.chars[self.i] != '[' { return Ok(None); } self.increment(1); self.skip_whitespace(); let mut result: Vec = vec![]; let mut initial = true; while self.chars[self.i] != ']' { self.skip_whitespace(); if initial == false { self.eat(',')?; } let value = self.parse_value()?; result.push(value); initial = false; } // move to next character: ']' self.increment(1); Ok(Some(JSONValue::from(result))) } /// See if there's a `JSONValue::String` next in the JSON fn parse_string(&mut self) -> Result, ParseError> { if self.chars[self.i] != '"' { return Ok(None); } self.increment(1); let mut result = String::new(); while self.chars[self.i] != '"' && self.i < self.chars.len() - 1 { // All the escape sequences... if self.chars[self.i] == '\\' { let ch = self.chars[self.i + 1]; if ch == '"' { result.push_str("\""); self.increment(1); } else if ['\\', '/'].contains(&ch) { let escaped = ch.escape_default().next().unwrap_or(ch); result.push(escaped); self.increment(1); } else if ['b', 'f', 'n', 'r', 't'].contains(&ch) { let ch = match ch { 'b' => '\u{8}', 'f' => '\x0C', 'n' => '\n', 'r' => '\r', 't' => '\t', _ => panic!("Shouldn't be possible!"), }; result.push(ch); self.increment(1); } else if ch == 'u' && self.chars[self.i + 2].is_ascii_hexdigit() && self.chars[self.i + 3].is_ascii_hexdigit() && self.chars[self.i + 4].is_ascii_hexdigit() && self.chars[self.i + 5].is_ascii_hexdigit() { // Blech, parse out a JSON unicode (utf16) escape code. Handles surrogate pairs // by giving you the replacement character...because...yeah let char_str = String::from_iter(&self.chars[self.i + 2..=self.i + 5]); let code = u16::from_str_radix(&char_str, 16) .expect("Failed to parse unicode escape number"); let string = String::from_utf16_lossy(&[code]); result.push_str(&string); self.increment(5); } } else { result.push(self.chars[self.i]); } self.increment(1); } self.eat('"')?; Ok(Some(JSONValue::from(result))) } /// See if there's a `JSONValue::Number` next in the JSON fn parse_number(&mut self) -> Result, ParseError> { let start = self.i; // If it doesn't start with 0-9 or a minus sign, it's probably not a number if !(self.chars[start].is_ascii_digit() || self.chars[start] == '-') { return Ok(None); } // All this looping basically just counts the number of characters in the number let max = self.chars.len() - 1; let mut n = start; // Minus sign if self.chars[n] == '-' && n < max { n += 1; } // Integer Part while self.chars[n].is_ascii_digit() && n < max { n += 1; } // Decimal Part if self.chars[n] == '.' && n < max { n += 1; while self.chars[n].is_ascii_digit() && n < max { n += 1; } } // Scientific notation part if self.chars[n] == 'e' || self.chars[n] == 'E' && n < max { n += 1; if self.chars[n] == '-' || self.chars[n] == '+' && n < max { n += 1; } // Exponent base while self.chars[n].is_ascii_digit() && n < max { n += 1; } } // If there are numeric digits attempt to parse the digits as a number if n > start { let mut end = if n < self.chars.len() { n } else { max }; // Hack to remove non-number characters if !self.chars[end].is_ascii_digit() { end -= 1; } let str = String::from_iter(&self.chars[start..=end]); match str.parse::() { Ok(number) => { self.increment(str.len()); return Ok(Some(JSONValue::from(number))); } Err(e) => Err(ParseError::ExpectedDigit(format!("'{}', {:#?}", str, e))), } } else { Ok(None) } } /// See if there's a `JSONValue::True`, `JSONValue::False`, or a `JSONValue::Null` next in the JSON fn parse_keyword( &mut self, search: &str, value: JSONValue, ) -> Result, ParseError> { let start = self.i; let end = if self.i + search.len() > self.chars.len() { self.chars.len() } else { self.i + search.len() }; let slice = &String::from_iter(&self.chars[start..end]); if slice == search { self.i += search.len(); return Ok(Some(value)); } Ok(None) } /// Increment the internal index until the next character is not a whitespace character fn skip_whitespace(&mut self) { while self.chars[self.i].is_ascii_whitespace() { self.increment(1); } } /// 'Eat' the specified character /// /// * If the next `char` matches the one passed, the internal index is incremented /// * If the next `char` does not match the one passed, a `ParseError::ExpectedToken` /// error is returned fn eat(&mut self, ch: char) -> Result<(), ParseError> { if self.chars[self.i] != ch { let msg = format!("Expected {}.", ch); return Err(ParseError::ExpectedToken(msg)); } self.increment(1); Ok(()) } /// Do a checked increment of the internal pointer index fn increment(&mut self, amount: usize) { let current = self.i; if current + amount >= self.chars.len() { self.i = self.chars.len() - 1; } else { self.i += amount; } } /// Convert a `&str` containing JSON into a `Result` pub fn parse(json: &str) -> JSONResult { JSON::new(json).parse_value() } } #[cfg(test)] mod tests { use super::JSONValue::{Array, False, Null, Number, Object, True}; use super::*; #[test] fn value_conversion() { let map: JSONMap = HashMap::new(); let num = 9.380831539; let str = "applesauce"; let arr: JSONArray = vec![JSONValue::from(map.clone()), JSONValue::from(num), JSONValue::from(str)]; assert_eq!(map.clone(), JSONMap::from(JSONValue::from(map.clone()))); assert_eq!(num, f64::from(JSONValue::from(num))); assert_eq!(String::from(str), String::from(JSONValue::from(str))); assert_eq!(arr.clone(), JSONArray::from(JSONValue::from(arr.clone()))); assert_eq!(true, bool::from(JSONValue::from(true))); assert_eq!(false, bool::from(JSONValue::from(false))); assert_eq!((), <()>::from(JSONValue::from(()))); } #[test] fn wrap_and_unwrap() { let map: JSONMap = HashMap::new(); let num = 9.380831539; let str = "applesauce"; let arr: JSONArray = vec![JSONValue::from(map.clone()), JSONValue::from(num), JSONValue::from(str)]; let s: String = JSONValue::from(str).unwrap(); let a: JSONArray = JSONValue::from(arr.clone()).unwrap(); assert_eq!(map.clone(), JSONValue::from(map.clone()).unwrap()); assert_eq!(num, JSONValue::from(num).unwrap()); assert_eq!(str, &s); assert_eq!(arr.clone(), a); assert_eq!(true, JSONValue::from(true).unwrap()); assert_eq!(false, JSONValue::from(false).unwrap()); assert_eq!((), JSONValue::from(()).unwrap()); } #[test] fn parse_keyword() { let res = JSON::new(r#""foobarbaz""#).parse_keyword("true", JSONValue::True); assert_eq!(res, Ok(None)); let res = JSON::new("true").parse_keyword("true", JSONValue::True); assert_eq!(res, Ok(Some(True))); } #[test] fn skip_whitespace() { let mut parser = JSON::new(" \t\r\nx"); parser.skip_whitespace(); assert_eq!('x', parser.chars[parser.i]); } #[test] fn parse_string() { let res = JSON::new(r#""\t""#).parse_string(); assert_eq!(res, Ok(Some(JSONValue::from("\t")))); let res = JSON::new(r#""\u203d""#).parse_string(); assert_eq!(res, Ok(Some(JSONValue::from("‽")))); } #[test] fn parse_empty_array() { let res = JSON::new("[]").parse_value(); assert_eq!(res, Ok(Array(vec![]))); } #[test] fn parse_number() { // This function works like I think, right? assert_eq!(','.is_ascii_digit(), false); let res = JSON::new(r#""foo""#).parse_number(); assert_eq!(res, Ok(None)); let res = JSON::new("3.14159").parse_number(); assert_eq!(res, Ok(Some(Number(3.14159f64)))); let res = JSON::new("3e4").parse_number(); assert_eq!(res, Ok(Some(Number(3e4f64)))); let res = JSON::new("1.234,").parse_number(); assert_eq!(res, Ok(Some(Number(1.234f64)))); } #[test] fn can_parse_array_of_keywords() { let result = JSON::parse("[true,false,null]"); assert_eq!(result, Ok(Array(vec![True, False, Null]))); } #[test] fn parse_object() { let result = JSON::new(r#"{"foo": "bar"}"#).parse_object(); let mut hash_map: JSONMap = HashMap::new(); hash_map.insert(String::from("foo"), JSONValue::from("bar")); assert_eq!(result, Ok(Some(JSONValue::Object(hash_map)))); } #[test] fn parse_json_types() { // Boolean / Null let res = JSON::parse("true"); assert_eq!(res, Ok(True)); let res = JSON::parse("false"); assert_eq!(res, Ok(False)); let res = JSON::parse("null"); assert_eq!(res, Ok(Null)); // Number let res = JSON::parse("9.38083151965"); assert_eq!(res, Ok(Number(9.38083151965)), "Failed to parse number"); // String let res = JSON::parse(r#""/^$/""#); assert_eq!( res, Ok(JSONValue::from("/^$/")), "Failed to parse string" ); // Number array let res = JSON::parse("[1, 2, 3]"); assert_eq!( res, Ok(Array(vec![Number(1f64), Number(2f64), Number(3f64)])) ); // Object array let result = JSON::parse("[{}]"); assert_eq!(result, Ok(JSONValue::Array(vec![Object(HashMap::new())]))); } #[test] fn parse_nested_object() { let res = JSON::parse(r#"{"a": {"b": []}}"#); let mut outermap: JSONMap = HashMap::new(); let mut innermap: JSONMap = HashMap::new(); innermap.insert(String::from("b"), Array(vec![])); outermap.insert(String::from("a"), Object(innermap)); let expected = Ok(Object(outermap)); assert_eq!(res, expected); } #[test] fn parse_object_with_number_values() { let result = JSON::parse(r#"[{ "a": 9.38083151965, "b": 4e3 }]"#); let mut map: JSONMap = HashMap::new(); map.insert(String::from("a"), Number(9.38083151965f64)); map.insert(String::from("b"), Number(4e3f64)); let expected = Ok(Array(vec![Object(map)])); assert_eq!( result, expected, "Failed on just number values: {:#?}", result ); } #[test] fn parse_weird_character_array() { let result = JSON::parse(r#"["\"", "\\", "/", "\b", "\f", "\n", "\r", "\t", "\u0001", "\uface"]"#); let expected = Ok(Array(vec![ JSONValue::from("\""), JSONValue::from("\\"), JSONValue::from("/"), JSONValue::from("\u{8}"), JSONValue::from("\x0C"), JSONValue::from("\n"), JSONValue::from("\r"), JSONValue::from("\t"), JSONValue::from("\u{1}"), JSONValue::from("\u{face}"), ])); assert_eq!(result, expected); } #[test] fn parse_full_json_example() { let result = JSON::parse( r#"[{ "a": 9.38083151965, "b": 4e3, "c": [1, 2, 3], "d": "foo", "e": { "f": { "g": { "h": null } } }, "i": ["\"", "\\", "/", "\b", "\f", "\n", "\r", "\t", "\u0001", "\uface"] }]"#, ); let mut map: JSONMap = HashMap::new(); let mut emap: JSONMap = HashMap::new(); let mut fmap: JSONMap = HashMap::new(); let mut gmap: JSONMap = HashMap::new(); gmap.insert(String::from("h"), Null); fmap.insert(String::from("g"), Object(gmap)); emap.insert(String::from("f"), Object(fmap)); map.insert(String::from("a"), Number(9.38083151965f64)); map.insert(String::from("b"), Number(4e3f64)); map.insert( String::from("c"), Array(vec![Number(1f64), Number(2f64), Number(3f64)]), ); map.insert(String::from("d"), JSONValue::from("foo")); map.insert(String::from("e"), Object(emap)); map.insert( String::from("i"), Array(vec![ JSONValue::from("\""), JSONValue::from("\\"), JSONValue::from("/"), JSONValue::from("\u{8}"), JSONValue::from("\x0C"), JSONValue::from("\n"), JSONValue::from("\r"), JSONValue::from("\t"), JSONValue::from("\u{1}"), JSONValue::from("\u{face}"), ]), ); assert!(result.is_ok(), format!("{:#?}", result)); let outer_array: Vec = result.unwrap().unwrap(); let result_map: JSONMap = outer_array[0].clone().unwrap(); for (k, v) in &map { assert_eq!( result_map.get(k).unwrap(), v, "HashMap Entry Differs: {:#?}, {:#?}", result_map.get(k).unwrap(), v ); } } }