From ecbf31409167f2c1d569c9a97dde5b24921ce110 Mon Sep 17 00:00:00 2001 From: Christopher Regali Date: Fri, 3 May 2024 21:43:35 +0200 Subject: [PATCH 1/3] Add first shot at ignoring the diff of keys. No sorting yet. --- Cargo.toml | 3 +- src/main.rs | 4 +- src/process.rs | 131 ++++++++++++++++++++++++++++--------------------- 3 files changed, 80 insertions(+), 58 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2454333..669c5d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,4 +28,5 @@ vg_errortools = "0.1" serde_json = { version = "1.0", features = ["preserve_order"] } maplit = "1.0" clap = {version = "4.4", features = ["derive"]} -diffs = "0.5" \ No newline at end of file +diffs = "0.5" +regex = "1.10" \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 36a1efb..e4b39aa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,8 @@ use clap::Parser; use clap::Subcommand; -use json_diff::enums::Error; use json_diff::{ds::mismatch::Mismatch, process::compare_jsons}; +use json_diff::enums::Error; #[derive(Subcommand, Clone)] /// Input selection @@ -40,7 +40,7 @@ fn main() -> Result<(), Error> { } }; - let mismatch = compare_jsons(&json_1, &json_2, args.sort_arrays)?; + let mismatch = compare_jsons(&json_1, &json_2, args.sort_arrays, &[])?; let comparison_result = check_diffs(mismatch)?; if !comparison_result { diff --git a/src/process.rs b/src/process.rs index 1e9141c..1456576 100644 --- a/src/process.rs +++ b/src/process.rs @@ -1,19 +1,25 @@ -use diffs::{myers, Diff, Replace}; use std::borrow::Cow; use std::collections::HashMap; use std::collections::HashSet; -use crate::enums::Error; +use diffs::{Diff, myers, Replace}; +use regex::Regex; use serde_json::Map; use serde_json::Value; use crate::ds::key_node::KeyNode; use crate::ds::mismatch::Mismatch; +use crate::enums::Error; -pub fn compare_jsons(a: &str, b: &str, sort_arrays: bool) -> Result { +pub fn compare_jsons( + a: &str, + b: &str, + sort_arrays: bool, + ignore_keys: &[Regex], +) -> Result { let value1 = serde_json::from_str(a)?; let value2 = serde_json::from_str(b)?; - Ok(match_json(&value1, &value2, sort_arrays)) + Ok(match_json(&value1, &value2, sort_arrays, ignore_keys)) } fn values_to_node(vec: Vec<(usize, &Value)>) -> KeyNode { if vec.is_empty() { @@ -61,28 +67,37 @@ impl<'a> Diff for ListDiffHandler<'a> { } } -pub fn match_json(value1: &Value, value2: &Value, sort_arrays: bool) -> Mismatch { +pub fn match_json( + value1: &Value, + value2: &Value, + sort_arrays: bool, + ignore_keys: &[Regex], +) -> Mismatch { match (value1, value2) { (Value::Object(a), Value::Object(b)) => { - let diff = intersect_maps(a, b); + let diff = intersect_maps(a, b, ignore_keys); let mut left_only_keys = get_map_of_keys(diff.left_only); let mut right_only_keys = get_map_of_keys(diff.right_only); let intersection_keys = diff.intersection; let mut unequal_keys = KeyNode::Nil; - if let Some(intersection_keys) = intersection_keys { - for key in intersection_keys { - let Mismatch { - left_only_keys: l, - right_only_keys: r, - keys_in_both: u, - } = match_json(a.get(&key).unwrap(), b.get(&key).unwrap(), sort_arrays); - left_only_keys = insert_child_key_map(left_only_keys, l, &key); - right_only_keys = insert_child_key_map(right_only_keys, r, &key); - unequal_keys = insert_child_key_map(unequal_keys, u, &key); - } + for key in intersection_keys { + let Mismatch { + left_only_keys: l, + right_only_keys: r, + keys_in_both: u, + } = match_json( + a.get(&key).unwrap(), + b.get(&key).unwrap(), + sort_arrays, + ignore_keys, + ); + left_only_keys = insert_child_key_map(left_only_keys, l, &key); + right_only_keys = insert_child_key_map(right_only_keys, r, &key); + unequal_keys = insert_child_key_map(unequal_keys, u, &key); } + Mismatch::new(left_only_keys, right_only_keys, unequal_keys) } (Value::Array(a), Value::Array(b)) => { @@ -131,7 +146,7 @@ pub fn match_json(value1: &Value, value2: &Value, sort_arrays: bool) -> Mismatch let inner_a = a.get(o + i).unwrap_or(&Value::Null); let inner_b = b.get(n + i).unwrap_or(&Value::Null); - let cdiff = match_json(inner_a, inner_b, sort_arrays); + let cdiff = match_json(inner_a, inner_b, sort_arrays, ignore_keys); let position = o + i; let Mismatch { left_only_keys: l, @@ -230,8 +245,8 @@ fn compare_values(a: &Value, b: &Value) -> std::cmp::Ordering { } } -fn get_map_of_keys(set: Option>) -> KeyNode { - if let Some(set) = set { +fn get_map_of_keys(set: HashSet) -> KeyNode { + if !set.is_empty() { KeyNode::Node( set.iter() .map(|key| (String::from(key), KeyNode::Nil)) @@ -259,16 +274,16 @@ fn insert_child_key_map(parent: KeyNode, child: KeyNode, key: &String) -> KeyNod } struct MapDifference { - left_only: Option>, - right_only: Option>, - intersection: Option>, + left_only: HashSet, + right_only: HashSet, + intersection: HashSet, } impl MapDifference { pub fn new( - left_only: Option>, - right_only: Option>, - intersection: Option>, + left_only: HashSet, + right_only: HashSet, + intersection: HashSet, ) -> Self { Self { right_only, @@ -278,43 +293,49 @@ impl MapDifference { } } -fn intersect_maps(a: &Map, b: &Map) -> MapDifference { +fn intersect_maps( + a: &Map, + b: &Map, + ignore_keys: &[Regex], +) -> MapDifference { let mut intersection = HashSet::new(); let mut left = HashSet::new(); + let mut right = HashSet::new(); - for a_key in a.keys() { + for a_key in a + .keys() + .filter(|k| ignore_keys.iter().all(|r| !r.is_match(k.as_str()))) + { if b.contains_key(a_key) { intersection.insert(String::from(a_key)); } else { left.insert(String::from(a_key)); } } - for b_key in b.keys() { + for b_key in b + .keys() + .filter(|k| ignore_keys.iter().all(|r| !r.is_match(k.as_str()))) + { if !a.contains_key(b_key) { right.insert(String::from(b_key)); } } - let left = if left.is_empty() { None } else { Some(left) }; - let right = if right.is_empty() { None } else { Some(right) }; - let intersection = if intersection.is_empty() { - None - } else { - Some(intersection) - }; + MapDifference::new(left, right, intersection) } #[cfg(test)] mod tests { - use super::*; use maplit::hashmap; use serde_json::json; + use super::*; + #[test] fn test_arrays_sorted_simple() { let data1 = r#"["a","b","c"]"#; let data2 = r#"["b","c","a"]"#; - let diff = compare_jsons(data1, data2, true).unwrap(); + let diff = compare_jsons(data1, data2, true, &[]).unwrap(); assert!(diff.is_empty()); } @@ -322,7 +343,7 @@ mod tests { fn test_arrays_sorted_objects() { let data1 = r#"[{"c": {"d": "e"} },"b","c"]"#; let data2 = r#"["b","c",{"c": {"d": "e"} }]"#; - let diff = compare_jsons(data1, data2, true).unwrap(); + let diff = compare_jsons(data1, data2, true, &[]).unwrap(); assert!(diff.is_empty()); } @@ -330,7 +351,7 @@ mod tests { fn test_arrays_deep_sorted_objects() { let data1 = r#"[{"c": ["d","e"] },"b","c"]"#; let data2 = r#"["b","c",{"c": ["e", "d"] }]"#; - let diff = compare_jsons(data1, data2, true).unwrap(); + let diff = compare_jsons(data1, data2, true, &[]).unwrap(); assert!(diff.is_empty()); } @@ -338,7 +359,7 @@ mod tests { fn test_arrays_deep_sorted_objects_with_arrays() { let data1 = r#"[{"a": [{"b": ["3", "1"]}] }, {"a": [{"b": ["2", "3"]}] }]"#; let data2 = r#"[{"a": [{"b": ["2", "3"]}] }, {"a": [{"b": ["1", "3"]}] }]"#; - let diff = compare_jsons(data1, data2, true).unwrap(); + let diff = compare_jsons(data1, data2, true, &[]).unwrap(); assert!(diff.is_empty()); } @@ -346,7 +367,7 @@ mod tests { fn test_arrays_deep_sorted_objects_with_outer_diff() { let data1 = r#"[{"c": ["d","e"] },"b"]"#; let data2 = r#"["b","c",{"c": ["e", "d"] }]"#; - let diff = compare_jsons(data1, data2, true).unwrap(); + let diff = compare_jsons(data1, data2, true, &[]).unwrap(); assert!(!diff.is_empty()); let insertions = diff.right_only_keys.absolute_keys_to_vec(None); assert_eq!(insertions.len(), 1); @@ -357,7 +378,7 @@ mod tests { fn test_arrays_deep_sorted_objects_with_inner_diff() { let data1 = r#"["a",{"c": ["d","e", "f"] },"b"]"#; let data2 = r#"["b",{"c": ["e","d"] },"a"]"#; - let diff = compare_jsons(data1, data2, true).unwrap(); + let diff = compare_jsons(data1, data2, true, &[]).unwrap(); assert!(!diff.is_empty()); let deletions = diff.left_only_keys.absolute_keys_to_vec(None); @@ -372,7 +393,7 @@ mod tests { fn test_arrays_deep_sorted_objects_with_inner_diff_mutation() { let data1 = r#"["a",{"c": ["d", "f"] },"b"]"#; let data2 = r#"["b",{"c": ["e","d"] },"a"]"#; - let diff = compare_jsons(data1, data2, true).unwrap(); + let diff = compare_jsons(data1, data2, true, &[]).unwrap(); assert!(!diff.is_empty()); let diffs = diff.keys_in_both.absolute_keys_to_vec(None); @@ -387,7 +408,7 @@ mod tests { fn test_arrays_simple_diff() { let data1 = r#"["a","b","c"]"#; let data2 = r#"["a","b","d"]"#; - let diff = compare_jsons(data1, data2, false).unwrap(); + let diff = compare_jsons(data1, data2, false, &[]).unwrap(); assert_eq!(diff.left_only_keys, KeyNode::Nil); assert_eq!(diff.right_only_keys, KeyNode::Nil); let diff = diff.keys_in_both.absolute_keys_to_vec(None); @@ -399,7 +420,7 @@ mod tests { fn test_arrays_more_complex_diff() { let data1 = r#"["a","b","c"]"#; let data2 = r#"["a","a","b","d"]"#; - let diff = compare_jsons(data1, data2, false).unwrap(); + let diff = compare_jsons(data1, data2, false, &[]).unwrap(); let changes_diff = diff.keys_in_both.absolute_keys_to_vec(None); assert_eq!(diff.left_only_keys, KeyNode::Nil); @@ -418,7 +439,7 @@ mod tests { fn test_arrays_extra_left() { let data1 = r#"["a","b","c"]"#; let data2 = r#"["a","b"]"#; - let diff = compare_jsons(data1, data2, false).unwrap(); + let diff = compare_jsons(data1, data2, false, &[]).unwrap(); let diffs = diff.left_only_keys.absolute_keys_to_vec(None); assert_eq!(diffs.len(), 1); @@ -431,7 +452,7 @@ mod tests { fn test_arrays_extra_right() { let data1 = r#"["a","b"]"#; let data2 = r#"["a","b","c"]"#; - let diff = compare_jsons(data1, data2, false).unwrap(); + let diff = compare_jsons(data1, data2, false, &[]).unwrap(); let diffs = diff.right_only_keys.absolute_keys_to_vec(None); assert_eq!(diffs.len(), 1); @@ -444,7 +465,7 @@ mod tests { fn long_insertion_modification() { let data1 = r#"["a","b","a"]"#; let data2 = r#"["a","c","c","c","a"]"#; - let diff = compare_jsons(data1, data2, false).unwrap(); + let diff = compare_jsons(data1, data2, false, &[]).unwrap(); let diffs = diff.keys_in_both.absolute_keys_to_vec(None); assert_eq!(diffs.len(), 3); @@ -460,7 +481,7 @@ mod tests { fn test_arrays_object_extra() { let data1 = r#"["a","b"]"#; let data2 = r#"["a","b", {"c": {"d": "e"} }]"#; - let diff = compare_jsons(data1, data2, false).unwrap(); + let diff = compare_jsons(data1, data2, false, &[]).unwrap(); let diffs = diff.right_only_keys.absolute_keys_to_vec(None); assert_eq!(diffs.len(), 1); @@ -543,7 +564,7 @@ mod tests { }); let expected = Mismatch::new(expected_left, expected_right, expected_uneq); - let mismatch = compare_jsons(data1, data2, false).unwrap(); + let mismatch = compare_jsons(data1, data2, false, &[]).unwrap(); assert_eq!(mismatch, expected, "Diff was incorrect."); } @@ -579,7 +600,7 @@ mod tests { }"#; assert_eq!( - compare_jsons(data1, data2, false).unwrap(), + compare_jsons(data1, data2, false, &[]).unwrap(), Mismatch::new(KeyNode::Nil, KeyNode::Nil, KeyNode::Nil) ); } @@ -590,7 +611,7 @@ mod tests { let data2 = r#"{}"#; assert_eq!( - compare_jsons(data1, data2, false).unwrap(), + compare_jsons(data1, data2, false, &[]).unwrap(), Mismatch::new(KeyNode::Nil, KeyNode::Nil, KeyNode::Nil) ); } @@ -599,7 +620,7 @@ mod tests { fn parse_err_source_one() { let invalid_json1 = r#"{invalid: json}"#; let valid_json2 = r#"{"a":"b"}"#; - match compare_jsons(invalid_json1, valid_json2, false) { + match compare_jsons(invalid_json1, valid_json2, false, &[]) { Ok(_) => panic!("This shouldn't be an Ok"), Err(err) => { matches!(err, Error::JSON(_)); @@ -611,7 +632,7 @@ mod tests { fn parse_err_source_two() { let valid_json1 = r#"{"a":"b"}"#; let invalid_json2 = r#"{invalid: json}"#; - match compare_jsons(valid_json1, invalid_json2, false) { + match compare_jsons(valid_json1, invalid_json2, false, &[]) { Ok(_) => panic!("This shouldn't be an Ok"), Err(err) => { matches!(err, Error::JSON(_)); From cd42fd7432b3d9076c46828d3ccd5a7838ecbde2 Mon Sep 17 00:00:00 2001 From: Christopher Regali Date: Fri, 3 May 2024 22:26:42 +0200 Subject: [PATCH 2/3] Adding first unit test for sorting with ignores --- src/process.rs | 77 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/src/process.rs b/src/process.rs index 1456576..76f3319 100644 --- a/src/process.rs +++ b/src/process.rs @@ -101,8 +101,8 @@ pub fn match_json( Mismatch::new(left_only_keys, right_only_keys, unequal_keys) } (Value::Array(a), Value::Array(b)) => { - let a = preprocess_array(sort_arrays, a); - let b = preprocess_array(sort_arrays, b); + let a = preprocess_array(sort_arrays, a, ignore_keys); + let b = preprocess_array(sort_arrays, b, ignore_keys); let mut replaced = Vec::new(); let mut deleted = Vec::new(); @@ -177,17 +177,21 @@ pub fn match_json( } } -fn preprocess_array(sort_arrays: bool, a: &Vec) -> Cow> { - if sort_arrays { +fn preprocess_array<'a>( + sort_arrays: bool, + a: &'a Vec, + ignore_keys: &[Regex], +) -> Cow<'a, Vec> { + if sort_arrays || !ignore_keys.is_empty() { let mut owned = a.to_owned(); - owned.sort_by(compare_values); + owned.sort_by(|a, b| compare_values(a, b, ignore_keys)); Cow::Owned(owned) } else { Cow::Borrowed(a) } } -fn compare_values(a: &Value, b: &Value) -> std::cmp::Ordering { +fn compare_values(a: &Value, b: &Value, ignore_keys: &[Regex]) -> std::cmp::Ordering { match (a, b) { (Value::Null, Value::Null) => std::cmp::Ordering::Equal, (Value::Null, _) => std::cmp::Ordering::Less, @@ -205,10 +209,10 @@ fn compare_values(a: &Value, b: &Value) -> std::cmp::Ordering { } (Value::String(a), Value::String(b)) => a.cmp(b), (Value::Array(a), Value::Array(b)) => { - let a = preprocess_array(true, a); - let b = preprocess_array(true, b); + let a = preprocess_array(true, a, ignore_keys); + let b = preprocess_array(true, b, ignore_keys); for (a, b) in a.iter().zip(b.iter()) { - let cmp = compare_values(a, b); + let cmp = compare_values(a, b, ignore_keys); if cmp != std::cmp::Ordering::Equal { return cmp; } @@ -220,14 +224,22 @@ fn compare_values(a: &Value, b: &Value) -> std::cmp::Ordering { let mut keys_b: Vec<_> = b.keys().collect(); keys_a.sort(); keys_b.sort(); - for (key_a, key_b) in keys_a.iter().zip(keys_b.iter()) { + for (key_a, key_b) in keys_a + .iter() + .filter(|a| ignore_keys.iter().all(|r| !r.is_match(a))) + .zip( + keys_b + .iter() + .filter(|a| ignore_keys.iter().all(|r| !r.is_match(a))), + ) + { let cmp = key_a.cmp(key_b); if cmp != std::cmp::Ordering::Equal { return cmp; } let value_a = &a[*key_a]; let value_b = &b[*key_b]; - let cmp = compare_values(value_a, value_b); + let cmp = compare_values(value_a, value_b, ignore_keys); if cmp != std::cmp::Ordering::Equal { return cmp; } @@ -331,6 +343,49 @@ mod tests { use super::*; + #[test] + fn sorting_ignores_ignored_keys() { + let data1: Value = + serde_json::from_str(r#"[{"a": 1, "b":2 }, { "a": 2, "b" : 1 }]"#).unwrap(); + let ignore = [Regex::new("a").unwrap()]; + let sorted_ignores = preprocess_array(true, data1.as_array().unwrap(), &ignore); + let sorted_no_ignores = preprocess_array(true, data1.as_array().unwrap(), &[]); + + assert_eq!( + sorted_ignores + .first() + .unwrap() + .as_object() + .unwrap() + .get("b") + .unwrap() + .as_i64() + .unwrap(), + 1 + ); + assert_eq!( + sorted_no_ignores + .first() + .unwrap() + .as_object() + .unwrap() + .get("b") + .unwrap() + .as_i64() + .unwrap(), + 2 + ); + } + + #[test] + fn test_arrays_sorted_objects_ignored() { + let data1 = r#"[{"c": {"d": "e"} },"b","c"]"#; + let data2 = r#"["b","c",{"c": {"d": "f"} }]"#; + let ignore = Regex::new("d").unwrap(); + let diff = compare_jsons(data1, data2, true, &[ignore]).unwrap(); + assert!(diff.is_empty()); + } + #[test] fn test_arrays_sorted_simple() { let data1 = r#"["a","b","c"]"#; From 57660763ae995ade758f2b0552b08639253d2e36 Mon Sep 17 00:00:00 2001 From: Christopher Regali Date: Fri, 3 May 2024 22:57:20 +0200 Subject: [PATCH 3/3] Bump crate version --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 669c5d7..5c3d9e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "json_diff_ng" -version = "0.4.1" +version = "0.5.0" authors = ["ksceriath", "ChrisRega"] edition = "2021" license = "Unlicense" @@ -27,6 +27,6 @@ thiserror = "1.0" vg_errortools = "0.1" serde_json = { version = "1.0", features = ["preserve_order"] } maplit = "1.0" -clap = {version = "4.4", features = ["derive"]} +clap = { version = "4.5", features = ["derive"] } diffs = "0.5" regex = "1.10" \ No newline at end of file