flow_security/
sanitizer.rs1use regex::Regex;
2use std::collections::HashMap;
3
4const PLACEHOLDER_HOSTNAME: &str = "[HOSTNAME]";
5const PLACEHOLDER_DATASET: &str = "[DATASET_NAME]";
6const PLACEHOLDER_IP: &str = "[IP]";
7const PLACEHOLDER_CRED: &str = "[CRED]";
8
9#[derive(Debug, Clone, Default)]
10pub struct RehydrationMap {
11 entries: HashMap<String, String>,
12}
13
14impl RehydrationMap {
15 pub fn rehydrate(&self, text: &str) -> String {
16 let mut out = text.to_string();
17 for (placeholder, original) in &self.entries {
18 out = out.replace(placeholder.as_str(), original);
19 }
20 out
21 }
22
23 pub fn is_empty(&self) -> bool {
24 self.entries.is_empty()
25 }
26}
27
28#[derive(Debug, Clone)]
29pub struct SanitizedInput {
30 pub text: String,
31 pub map: RehydrationMap,
32}
33
34pub struct PiiSanitizer {
35 ip_re: Regex,
36 hostname_re: Regex,
37 dataset_re: Regex,
38 cred_re: Regex,
39}
40
41impl Default for PiiSanitizer {
42 fn default() -> Self {
43 Self::new()
44 }
45}
46
47impl PiiSanitizer {
48 pub fn new() -> Self {
49 Self {
50 ip_re: Regex::new(r"\b\d{1,3}(?:\.\d{1,3}){3}\b").unwrap(),
51 hostname_re: Regex::new(
52 r"\b(?:[A-Za-z0-9][A-Za-z0-9\-]*\.)+[A-Za-z]{2,}\b",
53 )
54 .unwrap(),
55 dataset_re: Regex::new(
56 r"\b[A-Z][A-Z0-9$#@]{0,7}(?:\.[A-Z][A-Z0-9$#@]{0,7}){1,21}\b",
57 )
58 .unwrap(),
59 cred_re: Regex::new(
60 r"(?i)\b(?:password|passwd|pwd|secret|token|apikey|api_key|authorization)\s*[:=]\s*\S+",
61 )
62 .unwrap(),
63 }
64 }
65
66 pub fn sanitize(&self, input: &str) -> SanitizedInput {
67 let mut entries: HashMap<String, String> = HashMap::new();
68 let mut counter = 0u32;
69
70 let mut text = input.to_string();
71 text = replace_with_placeholder(
72 &self.cred_re,
73 &text,
74 PLACEHOLDER_CRED,
75 &mut counter,
76 &mut entries,
77 );
78 text = replace_with_placeholder(
79 &self.ip_re,
80 &text,
81 PLACEHOLDER_IP,
82 &mut counter,
83 &mut entries,
84 );
85 text = replace_with_placeholder(
87 &self.dataset_re,
88 &text,
89 PLACEHOLDER_DATASET,
90 &mut counter,
91 &mut entries,
92 );
93 text = replace_with_placeholder(
94 &self.hostname_re,
95 &text,
96 PLACEHOLDER_HOSTNAME,
97 &mut counter,
98 &mut entries,
99 );
100
101 SanitizedInput {
102 text,
103 map: RehydrationMap { entries },
104 }
105 }
106}
107
108fn replace_with_placeholder(
109 re: &Regex,
110 text: &str,
111 base_placeholder: &str,
112 counter: &mut u32,
113 entries: &mut HashMap<String, String>,
114) -> String {
115 let mut out = String::with_capacity(text.len());
116 let mut last = 0;
117 for m in re.find_iter(text) {
118 out.push_str(&text[last..m.start()]);
119 *counter += 1;
120 let placeholder = format!("{}_{}", base_placeholder.trim_end_matches(']'), counter);
121 let placeholder = format!("{}]", placeholder);
122 entries.insert(placeholder.clone(), m.as_str().to_string());
123 out.push_str(&placeholder);
124 last = m.end();
125 }
126 out.push_str(&text[last..]);
127 out
128}
129
130#[cfg(test)]
131mod tests {
132 use super::*;
133
134 #[test]
135 fn redacts_ip_address() {
136 let s = PiiSanitizer::new();
137 let r = s.sanitize("connect 10.20.30.40 now");
138 assert!(!r.text.contains("10.20.30.40"));
139 assert!(r.text.contains("[IP_"));
140 }
141
142 #[test]
143 fn redacts_dataset_name() {
144 let s = PiiSanitizer::new();
145 let r = s.sanitize("//SYSIN DD DSN=USER01.PROD.DATA,DISP=SHR");
146 assert!(!r.text.contains("USER01.PROD.DATA"));
147 assert!(r.text.contains("[DATASET_NAME_"));
148 }
149
150 #[test]
151 fn redacts_credential_pattern() {
152 let s = PiiSanitizer::new();
153 let r = s.sanitize("password=hunter2");
154 assert!(!r.text.contains("hunter2"));
155 }
156
157 #[test]
158 fn rehydrate_restores_originals() {
159 let s = PiiSanitizer::new();
160 let r = s.sanitize("server 10.0.0.1 down");
161 let back = r.map.rehydrate(&r.text);
162 assert_eq!(back, "server 10.0.0.1 down");
163 }
164
165 #[test]
166 fn empty_input_is_empty_output() {
167 let s = PiiSanitizer::new();
168 let r = s.sanitize("");
169 assert_eq!(r.text, "");
170 assert!(r.map.is_empty());
171 }
172}