Skip to main content

flow_application/
ansible_import.rs

1//! Ansible-collection import (Tier-1 CLI-veneer detection).
2//!
3//! Ansible collections ship as `.tar.gz` archives per the Galaxy artifact
4//! spec. For a large class of vendor collections, every "module" is a thin
5//! action plugin that just shells out to an external CLI - the YAML is a
6//! friendly veneer over a binary the controller already has on PATH.
7//!
8//! For these collections we don't need the `ansible-playbook` runtime to
9//! convert their playbooks: we can inspect the action-plugin source at
10//! import time, extract the CLI command string, and emit a corresponding
11//! flow node (zowe-adapter `cli-tool` when the binary is `zowe`, shell
12//! `run-command` otherwise).
13//!
14//! This module does the extract-and-detect half:
15//!
16//! 1. Open a collection `.tar.gz`, validate (entry count, total size, no
17//!    path traversal), and read the on-tape entries we care about
18//!    (`MANIFEST.json`, `plugins/action/*.py`).
19//! 2. Run a small regex pass over each action-plugin's source looking for
20//!    the two patterns that cover the realistic Tier-1 surface:
21//!    - `zowe_command = "<cmd>"` (zowe-CLI-veneer collections)
22//!    - `command_to_run = "<cmd>"` (generic CLI veneer)
23//! 3. Return a [`CollectionPreview`] the frontend's resolver pipeline can
24//!    seed itself with. Unknown modules surface as `cli_command: None` so
25//!    the resolver pipeline still falls through to the builtin table /
26//!    fallback for them.
27
28use std::collections::BTreeMap;
29use std::fs::File;
30use std::io::Read;
31use std::path::Path;
32
33use flate2::read::GzDecoder;
34use serde::{Deserialize, Serialize};
35use thiserror::Error;
36
37/// Maximum entries we tolerate in a collection tarball. Real collections
38/// run from ~10 to ~500 files; 5000 leaves generous headroom for big ones
39/// like `community.general` while hard-rejecting decoy archives.
40const MAX_ENTRIES: usize = 5000;
41/// Maximum cumulative uncompressed size we tolerate. 256 MB is well above
42/// any realistic collection (single-digit MB even for large vendor
43/// collections) and stops decompression-bomb attacks early.
44const MAX_UNCOMPRESSED_BYTES: u64 = 256 * 1024 * 1024;
45
46#[derive(Debug, Error)]
47pub enum ImportError {
48    #[error("io: {0}")]
49    Io(String),
50    #[error("invalid archive: {0}")]
51    InvalidArchive(String),
52    #[error("missing or malformed MANIFEST.json: {0}")]
53    BadManifest(String),
54    #[error("archive too large: {0}")]
55    TooLarge(String),
56}
57
58#[derive(Debug, Serialize, Deserialize)]
59#[serde(rename_all = "camelCase")]
60pub struct CollectionPreview {
61    pub namespace: String,
62    pub name: String,
63    pub version: String,
64    /// One entry per `plugins/modules/<name>.py` found, sorted by short name.
65    pub modules: Vec<ModulePreview>,
66}
67
68#[derive(Debug, Serialize, Deserialize)]
69#[serde(rename_all = "camelCase")]
70pub struct ModulePreview {
71    /// Fully-qualified collection name: `<ns>.<name>.<short>`.
72    pub fqcn: String,
73    /// Short module name (`deploy_ddl`, `copy`, ...).
74    pub short_name: String,
75    /// Detected CLI invocation, if the action plugin matched one of the
76    /// known patterns. `None` means the converter should fall through to
77    /// builtin / fallback resolvers.
78    pub cli_command: Option<CliCommand>,
79    /// Raw YAML from the module's `EXAMPLES = r'''...'''` documentation
80    /// block, if present. Used to synthesize a demo playbook when the user
81    /// imports a collection without a corresponding playbook on disk.
82    pub examples: Option<String>,
83}
84
85#[derive(Debug, Serialize, Deserialize, Clone)]
86#[serde(rename_all = "camelCase")]
87pub struct CliCommand {
88    /// Top-level binary: `"zowe"`, `"pwsh"`, `"kubectl"`, etc. Used by the
89    /// TS resolver to pick the right adapter (`zowe` → `zowe:cli-tool`,
90    /// anything else → `shell:run-command`).
91    pub binary: String,
92    /// The subcommand path as a single space-joined string, ready to drop
93    /// into the `command:` field of a `zowe:cli-tool` node or into the args
94    /// portion of a `shell:run-command`.
95    pub subcommand: String,
96}
97
98/// Top-level entry point. Reads + validates the tarball, scans every
99/// `plugins/modules/*.py` to enumerate the collection's surface, then runs
100/// the CLI detector against each module's matching action plugin (if any).
101pub fn import_collection(tar_path: &Path) -> Result<CollectionPreview, ImportError> {
102    let file = File::open(tar_path)
103        .map_err(|e| ImportError::Io(format!("open {}: {e}", tar_path.display())))?;
104    let decoder = GzDecoder::new(file);
105    let mut archive = tar::Archive::new(decoder);
106
107    let mut manifest: Option<RawManifest> = None;
108    let mut module_names: Vec<String> = Vec::new();
109    let mut module_sources: BTreeMap<String, String> = BTreeMap::new();
110    let mut action_sources: BTreeMap<String, String> = BTreeMap::new();
111    let mut entry_count = 0usize;
112    let mut total_bytes = 0u64;
113
114    for entry in archive
115        .entries()
116        .map_err(|e| ImportError::InvalidArchive(format!("read entries: {e}")))?
117    {
118        let mut entry =
119            entry.map_err(|e| ImportError::InvalidArchive(format!("bad entry: {e}")))?;
120
121        entry_count += 1;
122        if entry_count > MAX_ENTRIES {
123            return Err(ImportError::TooLarge(format!(
124                "more than {MAX_ENTRIES} entries"
125            )));
126        }
127
128        let size = entry.header().size().unwrap_or(0);
129        total_bytes = total_bytes.saturating_add(size);
130        if total_bytes > MAX_UNCOMPRESSED_BYTES {
131            return Err(ImportError::TooLarge(format!(
132                "uncompressed bytes exceed {MAX_UNCOMPRESSED_BYTES}"
133            )));
134        }
135
136        let path = entry
137            .path()
138            .map_err(|e| ImportError::InvalidArchive(format!("entry path: {e}")))?
139            .into_owned();
140        let path_str = path.to_string_lossy();
141        if path_str.contains("..") || path.is_absolute() {
142            return Err(ImportError::InvalidArchive(format!(
143                "unsafe path: {path_str}"
144            )));
145        }
146
147        if path_str == "MANIFEST.json" {
148            let mut buf = String::new();
149            entry
150                .read_to_string(&mut buf)
151                .map_err(|e| ImportError::Io(format!("read MANIFEST.json: {e}")))?;
152            manifest = Some(
153                serde_json::from_str(&buf)
154                    .map_err(|e| ImportError::BadManifest(e.to_string()))?,
155            );
156            continue;
157        }
158
159        if let Some(name) = strip_prefix_and_py(&path_str, "plugins/modules/") {
160            let mut buf = String::new();
161            entry
162                .read_to_string(&mut buf)
163                .map_err(|e| ImportError::Io(format!("read module {name}: {e}")))?;
164            module_names.push(name.clone());
165            module_sources.insert(name, buf);
166            continue;
167        }
168
169        if let Some(name) = strip_prefix_and_py(&path_str, "plugins/action/") {
170            let mut buf = String::new();
171            entry
172                .read_to_string(&mut buf)
173                .map_err(|e| ImportError::Io(format!("read action {name}: {e}")))?;
174            action_sources.insert(name, buf);
175            continue;
176        }
177    }
178
179    let manifest = manifest.ok_or_else(|| {
180        ImportError::BadManifest("MANIFEST.json not present in archive".into())
181    })?;
182    let info = manifest.collection_info;
183
184    module_names.sort();
185    module_names.dedup();
186
187    let modules = module_names
188        .into_iter()
189        .map(|short| {
190            let cli = action_sources
191                .get(&short)
192                .and_then(|src| detect_cli_command(src));
193            let examples = module_sources
194                .get(&short)
195                .and_then(|src| extract_examples_block(src));
196            ModulePreview {
197                fqcn: format!("{}.{}.{}", info.namespace, info.name, short),
198                short_name: short,
199                cli_command: cli,
200                examples,
201            }
202        })
203        .collect();
204
205    Ok(CollectionPreview {
206        namespace: info.namespace,
207        name: info.name,
208        version: info.version,
209        modules,
210    })
211}
212
213#[derive(Debug, Deserialize)]
214struct RawManifest {
215    collection_info: RawCollectionInfo,
216}
217
218#[derive(Debug, Deserialize)]
219struct RawCollectionInfo {
220    namespace: String,
221    name: String,
222    version: String,
223}
224
225fn strip_prefix_and_py(path: &str, prefix: &str) -> Option<String> {
226    let rest = path.strip_prefix(prefix)?;
227    let stem = rest.strip_suffix(".py")?;
228    if stem.is_empty() || stem.contains('/') {
229        return None;
230    }
231    if stem == "__init__" {
232        return None;
233    }
234    Some(stem.to_string())
235}
236
237/// Regex-based CLI-veneer detector.
238///
239/// Looks for the three statically-extractable patterns we've seen across
240/// Tier-1 collections. Anything more dynamic (commands built from f-strings,
241/// branches on input flags, etc.) doesn't match - the resolver falls back
242/// to the builtin table / placeholder log node, and a future iteration can
243/// escalate to a real Python AST pass if those cases become common.
244fn detect_cli_command(source: &str) -> Option<CliCommand> {
245    use regex::Regex;
246    // `zowe_command = "..."` - zowe-CLI-veneer collections.
247    let zowe = Regex::new(r#"(?m)^\s*zowe_command\s*=\s*["']([^"']+)["']"#).ok()?;
248    if let Some(cap) = zowe.captures(source) {
249        let subcommand = cap.get(1).map(|m| m.as_str().trim().to_string())?;
250        if !subcommand.is_empty() {
251            return Some(CliCommand {
252                binary: "zowe".into(),
253                subcommand,
254            });
255        }
256    }
257    // `command_to_run = "<binary> ..."` - generic-CLI variant. The binary is
258    // taken from the first whitespace-separated token; the rest is the
259    // subcommand. Single string only - argv form is too easy to misparse.
260    let cmd = Regex::new(r#"(?m)^\s*command_to_run\s*=\s*["']([^"']+)["']"#).ok()?;
261    if let Some(cap) = cmd.captures(source) {
262        let line = cap.get(1).map(|m| m.as_str().trim().to_string())?;
263        if let Some((bin, rest)) = line.split_once(char::is_whitespace) {
264            return Some(CliCommand {
265                binary: bin.to_string(),
266                subcommand: rest.trim().to_string(),
267            });
268        }
269        return Some(CliCommand {
270            binary: line,
271            subcommand: String::new(),
272        });
273    }
274    None
275}
276
277/// Extract the YAML body of an Ansible module's `EXAMPLES = r'''...'''`
278/// documentation block. Handles the four quoting variants real modules use:
279/// `r'''...'''`, `'''...'''`, `r"""..."""`, `"""..."""`. Returns `None`
280/// when nothing parses cleanly - better to skip a module's examples than
281/// surface a malformed YAML capture that breaks the demo converter.
282fn extract_examples_block(source: &str) -> Option<String> {
283    use regex::Regex;
284    // Two patterns covering the four quoting variants: triple-single and
285    // triple-double, each optionally raw-prefixed. `(?s)` lets `.` match
286    // newlines; `.*?` is non-greedy so we stop at the first closing triple.
287    let triples: [(&str, &str); 2] = [
288        (r#"(?s)EXAMPLES\s*=\s*r?'''(.*?)'''"#, ""),
289        (r#"(?s)EXAMPLES\s*=\s*r?"""(.*?)""""#, ""),
290    ];
291    for (pat, _) in triples {
292        if let Ok(re) = Regex::new(pat) {
293            if let Some(cap) = re.captures(source) {
294                if let Some(body) = cap.get(1) {
295                    let s = body.as_str().trim();
296                    if !s.is_empty() {
297                        return Some(s.to_string());
298                    }
299                }
300            }
301        }
302    }
303    None
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    #[test]
311    fn detects_zowe_command() {
312        let src = r#"
313def run(self):
314    zowe_command = "dbm-db2 deploy ddl"
315    return zowe_command
316"#;
317        let got = detect_cli_command(src).expect("should detect");
318        assert_eq!(got.binary, "zowe");
319        assert_eq!(got.subcommand, "dbm-db2 deploy ddl");
320    }
321
322    #[test]
323    fn detects_command_to_run_with_binary() {
324        let src = r#"command_to_run = "pwsh -Command Get-Process""#;
325        let got = detect_cli_command(src).expect("should detect");
326        assert_eq!(got.binary, "pwsh");
327        assert_eq!(got.subcommand, "-Command Get-Process");
328    }
329
330    #[test]
331    fn no_match_for_dynamic_command() {
332        let src = r#"zowe_command = f"dbm-db2 {action}""#;
333        assert!(detect_cli_command(src).is_none());
334    }
335
336    #[test]
337    fn extracts_raw_triple_single_examples() {
338        let src = r#"
339DOCUMENTATION = '''---
340module: do_thing
341'''
342
343EXAMPLES = r'''
344- name: Do the thing
345  ns.coll.do_thing:
346    target: alpha
347'''
348
349RETURN = '''
350data: ...
351'''
352"#;
353        let got = extract_examples_block(src).expect("should extract");
354        assert!(got.starts_with("- name: Do the thing"));
355        assert!(got.contains("target: alpha"));
356        assert!(!got.contains("RETURN"));
357    }
358
359    #[test]
360    fn extracts_triple_double_examples() {
361        let src = r#"
362EXAMPLES = """
363- name: Hi
364  some.mod.x:
365    a: 1
366"""
367"#;
368        let got = extract_examples_block(src).expect("should extract");
369        assert!(got.contains("a: 1"));
370    }
371
372    #[test]
373    fn no_examples_returns_none() {
374        let src = r#"DOCUMENTATION = '''m'''"#;
375        assert!(extract_examples_block(src).is_none());
376    }
377
378    #[test]
379    fn strip_prefix_rejects_init_and_nested() {
380        assert_eq!(
381            strip_prefix_and_py("plugins/modules/copy.py", "plugins/modules/"),
382            Some("copy".into())
383        );
384        assert_eq!(
385            strip_prefix_and_py("plugins/modules/__init__.py", "plugins/modules/"),
386            None
387        );
388        assert_eq!(
389            strip_prefix_and_py("plugins/modules/sub/copy.py", "plugins/modules/"),
390            None
391        );
392    }
393}