Skip to main content

flow_application/
hub.rs

1//! Model Hub client.
2//!
3//! The Hub is the central registry that distributes versioned model artifacts
4//! (GGUF / MLX / safetensors models). Clients **download** to their local
5//! environment and run inference locally - only artifact bytes + metadata
6//! ever cross the wire, never inference data. This module is the client: a
7//! catalog + a streaming downloader into `~/.flow-studio/llms/`.
8//!
9//! There is no real Hub artifactory service yet, so the catalog is loaded from
10//! [`hub_catalog.json`](hub_catalog.json) - an `{ apiVersion, models }` envelope
11//! checked into the repo that doubles as a **proof-of-concept of the future
12//! registry API response**. It carries everything the UI renders (browse /
13//! search / filter / detail) plus per-model hardware requirements that drive the
14//! device-compatibility check. Edit the JSON to add/remove models; the same
15//! shape will later come over the wire from the real service.
16
17use std::path::{Path, PathBuf};
18use std::sync::atomic::{AtomicBool, Ordering};
19use std::sync::Arc;
20use std::sync::OnceLock;
21
22use futures_util::StreamExt;
23use serde::{Deserialize, Serialize};
24use sha2::{Digest, Sha256};
25
26use crate::install::{InstallProgress, ProgressCallback};
27
28/// Artifact format. `Gguf`/`Mlx` run via the managed inference server;
29/// `SafeTensors` is distributed but not an immediate runtime target.
30#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
31#[serde(rename_all = "lowercase")]
32pub enum HubFormat {
33    Gguf,
34    Mlx,
35    #[serde(rename = "safetensors")]
36    SafeTensors,
37}
38
39/// GPU-offload class for a download variant, rendered as a hardware hint.
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41#[serde(rename_all = "snake_case")]
42pub enum Offload {
43    /// Fits entirely in VRAM.
44    FullGpu,
45    /// Partially offloaded; some layers on CPU.
46    Partial,
47    /// Metal/MLX path on Apple Silicon.
48    AppleSilicon,
49}
50
51/// One downloadable artifact variant (format × quantization × offload).
52#[derive(Debug, Clone, Serialize, Deserialize)]
53#[serde(rename_all = "camelCase")]
54pub struct DownloadOption {
55    /// Stable id within the model, e.g. `gguf-q4_k_m`.
56    pub id: String,
57    pub format: HubFormat,
58    /// Quantization label, e.g. `Q4_K_M`, `Q8_0`, `4bit`, `F16`.
59    pub quant: String,
60    pub offload: Offload,
61    pub size_bytes: u64,
62    pub url: String,
63    /// Lowercase hex sha256. When present the download is verified.
64    #[serde(default)]
65    pub sha256: Option<String>,
66}
67
68/// A single released version with its changelog note.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70#[serde(rename_all = "camelCase")]
71pub struct HubVersion {
72    pub version: String,
73    pub note: String,
74    /// Relative label as the Hub serves it (e.g. "3 days ago").
75    pub released_at: String,
76}
77
78/// Hardware requirements + supported platforms for the model. Drives the
79/// device-compatibility check (RAM + disk + platform).
80#[derive(Debug, Clone, Serialize, Deserialize)]
81#[serde(rename_all = "camelCase")]
82pub struct Hardware {
83    pub min_ram_gb: u32,
84    /// Comfortable RAM for the default variant (display only).
85    #[serde(default)]
86    pub recommended_ram_gb: u32,
87    /// VRAM hint (display only; the live check uses RAM/disk/platform).
88    #[serde(default)]
89    pub min_vram_gb: u32,
90    /// Free disk needed for the default variant, GB.
91    #[serde(default)]
92    pub min_disk_gb: u32,
93    /// Whether a GPU is required (display only).
94    #[serde(default)]
95    pub gpu_required: bool,
96    /// Supported platform tokens: `macos-arm64 | macos-x64 | linux-x64 |
97    /// windows-x64`. Empty = runs everywhere.
98    #[serde(default)]
99    pub platforms: Vec<String>,
100}
101
102/// A model in the Hub registry. Camel-cased for the frontend; loaded from
103/// `hub_catalog.json`, the real Hub service will serve the same shape.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105#[serde(rename_all = "camelCase")]
106pub struct HubModel {
107    pub id: String,
108    pub name: String,
109    pub author: String,
110    pub arch: String,
111    pub domain: String,
112    /// Parameter count label, e.g. `14B`, `30B-A3B`, `335M`.
113    pub params: String,
114    pub verified: bool,
115    pub staff: bool,
116    /// Whether the model has a "thinking"/reasoning mode that can be toggled
117    /// off (e.g. Qwen3). Gates the Enable-Thinking control in the Load panel.
118    #[serde(default)]
119    pub supports_thinking: bool,
120    /// Maximum context window the model supports (drives the context slider's
121    /// upper bound + the "supports up to N tokens" hint). 0 = unknown.
122    #[serde(default)]
123    pub max_context_tokens: u32,
124    /// Total transformer layers (drives the GPU-offload slider max). 0 = unknown.
125    #[serde(default)]
126    pub num_layers: u32,
127    /// SPDX-ish license id, e.g. `apache-2.0` (display + filter).
128    #[serde(default)]
129    pub license: String,
130    /// Free-form tags fed into search + filtering.
131    #[serde(default)]
132    pub tags: Vec<String>,
133    pub formats: Vec<HubFormat>,
134    /// Capability tags: `code | tool_use | reasoning | vision | embedding`.
135    pub capabilities: Vec<String>,
136    pub downloads: u64,
137    pub stars: u64,
138    pub updated_at: String,
139    pub latest_version: String,
140    pub description: String,
141    pub version_history: Vec<HubVersion>,
142    pub download_options: Vec<DownloadOption>,
143    pub hardware: Hardware,
144}
145
146/// A locally-downloaded LLM.
147#[derive(Debug, Clone, Serialize)]
148#[serde(rename_all = "camelCase")]
149pub struct LocalLlm {
150    pub file_name: String,
151    pub path: String,
152    pub size_bytes: u64,
153}
154
155/// An installed model derived by joining local artifacts against the catalog.
156#[derive(Debug, Clone, Serialize)]
157#[serde(rename_all = "camelCase")]
158pub struct InstalledModel {
159    pub id: String,
160    pub version: String,
161    pub format: HubFormat,
162    /// True when the catalog's `latest_version` is newer than `version`.
163    pub update_available: bool,
164}
165
166/// The catalog data file, embedded at compile time. Editing it + rebuilding
167/// (dev `pnpm desktop:dev` recompiles the crate) updates the model list.
168const CATALOG_JSON: &str = include_str!("../../../mocks/hub_catalog.json");
169
170/// API-response envelope: `{ "apiVersion": "1", "models": [ … ] }`. Mirrors the
171/// shape the future registry service will serve.
172#[derive(Deserialize)]
173struct CatalogEnvelope {
174    #[serde(default)]
175    api_version: String,
176    models: Vec<HubModel>,
177}
178
179/// Parse the embedded catalog once. A malformed JSON file panics at first use
180/// (loud, deterministic - it ships in the binary, so it's a build-time error in
181/// spirit).
182pub fn catalog() -> Vec<HubModel> {
183    static CATALOG: OnceLock<Vec<HubModel>> = OnceLock::new();
184    CATALOG
185        .get_or_init(|| {
186            let env: CatalogEnvelope = serde_json::from_str(CATALOG_JSON)
187                .expect("hub_catalog.json is malformed");
188            let _ = env.api_version;
189            env.models
190        })
191        .clone()
192}
193
194/// Find a catalog model by id.
195pub fn find(id: &str) -> Option<HubModel> {
196    catalog().into_iter().find(|m| m.id == id)
197}
198
199/// Resolve a download variant: the named `option_id`, else the first option.
200pub fn resolve_option<'a>(model: &'a HubModel, option_id: Option<&str>) -> Option<&'a DownloadOption> {
201    match option_id {
202        Some(oid) => model.download_options.iter().find(|o| o.id == oid),
203        None => model.download_options.first(),
204    }
205}
206
207/// List downloaded LLMs under `llms_dir`.
208pub fn list_local_llms(llms_dir: &Path) -> Vec<LocalLlm> {
209    let mut out = Vec::new();
210    let Ok(rd) = std::fs::read_dir(llms_dir) else {
211        return out;
212    };
213    for entry in rd.flatten() {
214        let path = entry.path();
215        if path.extension().and_then(|e| e.to_str()) != Some("gguf") {
216            continue;
217        }
218        let size = entry.metadata().map(|m| m.len()).unwrap_or(0);
219        out.push(LocalLlm {
220            file_name: path
221                .file_name()
222                .map(|n| n.to_string_lossy().to_string())
223                .unwrap_or_default(),
224            path: path.to_string_lossy().to_string(),
225            size_bytes: size,
226        });
227    }
228    out.sort_by(|a, b| a.file_name.cmp(&b.file_name));
229    out
230}
231
232/// Delete a downloaded LLM by file name, confined to `llms_dir`. The name
233/// must be a bare file name (no separators) so a caller can't escape the
234/// directory.
235pub fn delete_local_llm(llms_dir: &Path, file_name: &str) -> Result<(), String> {
236    if file_name.is_empty()
237        || file_name.contains('/')
238        || file_name.contains('\\')
239        || file_name.contains("..")
240    {
241        return Err(format!("invalid file name {file_name:?}"));
242    }
243    let path = llms_dir.join(file_name);
244    if !path.is_file() {
245        return Err(format!("no such model file: {file_name}"));
246    }
247    std::fs::remove_file(&path).map_err(|e| format!("remove {}: {e}", path.display()))
248}
249
250/// Stream-download a model's variant into `dest_dir`, emitting
251/// `stage: "download"` progress, then verify sha256 if the option pins one.
252/// Streams to a `.part` file and renames on success.
253pub async fn download(
254    model: &HubModel,
255    option_id: Option<&str>,
256    dest_dir: &Path,
257    progress: Option<ProgressCallback>,
258    cancel: Option<Arc<AtomicBool>>,
259) -> Result<PathBuf, String> {
260    let option = resolve_option(model, option_id)
261        .ok_or_else(|| format!("model {} has no download option {option_id:?}", model.id))?;
262    std::fs::create_dir_all(dest_dir).map_err(|e| format!("create {}: {e}", dest_dir.display()))?;
263    let file_name = option
264        .url
265        .rsplit('/')
266        .next()
267        .filter(|s| !s.is_empty())
268        .unwrap_or(&option.id)
269        .to_string();
270    let final_path = dest_dir.join(&file_name);
271    let part_path = dest_dir.join(format!("{file_name}.part"));
272
273    let client = reqwest::Client::new();
274    let resp = client
275        .get(&option.url)
276        .send()
277        .await
278        .map_err(|e| format!("GET {}: {e}", option.url))?;
279    if !resp.status().is_success() {
280        return Err(format!("download {}: HTTP {}", option.url, resp.status()));
281    }
282    let total = resp.content_length().unwrap_or(option.size_bytes);
283
284    let mut file = std::fs::File::create(&part_path)
285        .map_err(|e| format!("create {}: {e}", part_path.display()))?;
286    let mut hasher = Sha256::new();
287    let mut downloaded: u64 = 0;
288    let mut stream = resp.bytes_stream();
289    let mut last_emit = 0u64;
290
291    use std::io::Write;
292    while let Some(chunk) = stream.next().await {
293        // Honor a cancellation request: drop the partial file and bail.
294        if cancel.as_ref().is_some_and(|c| c.load(Ordering::Relaxed)) {
295            drop(file);
296            let _ = std::fs::remove_file(&part_path);
297            return Err("download cancelled".to_string());
298        }
299        let chunk = chunk.map_err(|e| format!("stream {}: {e}", option.url))?;
300        file.write_all(&chunk)
301            .map_err(|e| format!("write {}: {e}", part_path.display()))?;
302        hasher.update(&chunk);
303        downloaded += chunk.len() as u64;
304        if downloaded - last_emit >= 4 * 1024 * 1024 || downloaded == total {
305            last_emit = downloaded;
306            if let Some(cb) = &progress {
307                cb(InstallProgress {
308                    stage: "download",
309                    current: downloaded,
310                    total,
311                    message: model.name.clone(),
312                });
313            }
314        }
315    }
316    file.flush().map_err(|e| format!("flush: {e}"))?;
317    drop(file);
318
319    if let Some(expected) = &option.sha256 {
320        let actual = hex::encode(hasher.finalize());
321        if !actual.eq_ignore_ascii_case(expected) {
322            let _ = std::fs::remove_file(&part_path);
323            return Err(format!(
324                "sha256 mismatch for {}: expected {expected}, got {actual}",
325                model.id
326            ));
327        }
328    }
329
330    std::fs::rename(&part_path, &final_path)
331        .map_err(|e| format!("finalize {}: {e}", final_path.display()))?;
332
333    if let Some(cb) = &progress {
334        cb(InstallProgress {
335            stage: "done",
336            current: total,
337            total,
338            message: model.name.clone(),
339        });
340    }
341    Ok(final_path)
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    fn unique_tmp() -> PathBuf {
349        use std::sync::atomic::{AtomicU64, Ordering};
350        static C: AtomicU64 = AtomicU64::new(0);
351        let n = C.fetch_add(1, Ordering::Relaxed);
352        std::env::temp_dir().join(format!("flow-hub-{}-{n}", std::process::id()))
353    }
354
355    #[test]
356    fn catalog_loads_and_is_well_formed() {
357        let c = catalog();
358        assert!(c.len() >= 5, "expected >= 5 models, got {}", c.len());
359        // The user's coding model must be present.
360        assert!(c.iter().any(|m| m.id == "qwen/qwen3.6-35b-a3b"));
361        for m in &c {
362            assert!(!m.id.is_empty(), "id");
363            assert!(!m.name.is_empty(), "{} name", m.id);
364            assert_eq!(m.download_options.len(), 1, "{} one option", m.id);
365            let o = &m.download_options[0];
366            assert!(o.url.starts_with("https://"), "{} https url", m.id);
367            assert!(o.size_bytes > 0, "{} size", m.id);
368            assert!(o.url.ends_with(".gguf"), "{} gguf url", m.id);
369            assert!(!m.hardware.platforms.is_empty(), "{} platforms", m.id);
370            assert!(m.hardware.min_ram_gb > 0, "{} minRamGb", m.id);
371        }
372        // find() resolves an id that's actually in the catalog and rejects an
373        // unknown one. Use the first entry so this stays correct as the
374        // catalog's model set changes.
375        assert!(find(&c[0].id).is_some());
376        assert!(find("does/not-exist").is_none());
377    }
378
379    #[test]
380    fn resolve_option_picks_named_or_first() {
381        let m = HubModel {
382            id: "org/m".into(),
383            name: "m".into(),
384            author: "org".into(),
385            arch: "gguf".into(),
386            domain: "LLM".into(),
387            params: "7B".into(),
388            verified: false,
389            staff: false,
390            supports_thinking: false,
391            max_context_tokens: 32768,
392            num_layers: 28,
393            license: "apache-2.0".into(),
394            tags: vec![],
395            formats: vec![HubFormat::Gguf],
396            capabilities: vec![],
397            downloads: 0,
398            stars: 0,
399            updated_at: String::new(),
400            latest_version: "main".into(),
401            description: String::new(),
402            version_history: vec![],
403            download_options: vec![
404                DownloadOption {
405                    id: "a-q4_k_m.gguf".into(),
406                    format: HubFormat::Gguf,
407                    quant: "Q4_K_M".into(),
408                    offload: Offload::FullGpu,
409                    size_bytes: 100,
410                    url: "https://h/a-q4_k_m.gguf".into(),
411                    sha256: None,
412                },
413                DownloadOption {
414                    id: "a-q8_0.gguf".into(),
415                    format: HubFormat::Gguf,
416                    quant: "Q8_0".into(),
417                    offload: Offload::FullGpu,
418                    size_bytes: 200,
419                    url: "https://h/a-q8_0.gguf".into(),
420                    sha256: None,
421                },
422            ],
423            hardware: Hardware {
424                min_ram_gb: 0,
425                recommended_ram_gb: 0,
426                min_vram_gb: 0,
427                min_disk_gb: 0,
428                gpu_required: false,
429                platforms: vec![],
430            },
431        };
432        assert_eq!(resolve_option(&m, None).unwrap().id, "a-q4_k_m.gguf");
433        assert_eq!(resolve_option(&m, Some("a-q8_0.gguf")).unwrap().quant, "Q8_0");
434        assert!(resolve_option(&m, Some("nope")).is_none());
435    }
436
437    #[test]
438    fn delete_local_llm_jails_to_dir() {
439        let dir = unique_tmp();
440        std::fs::create_dir_all(&dir).unwrap();
441        std::fs::write(dir.join("a.gguf"), b"x").unwrap();
442        // Traversal / separators refused.
443        assert!(delete_local_llm(&dir, "../evil").is_err());
444        assert!(delete_local_llm(&dir, "sub/a.gguf").is_err());
445        assert!(delete_local_llm(&dir, "missing.gguf").is_err());
446        // Real file inside the dir is removed.
447        assert!(delete_local_llm(&dir, "a.gguf").is_ok());
448        assert!(!dir.join("a.gguf").exists());
449        let _ = std::fs::remove_dir_all(&dir);
450    }
451
452    #[test]
453    fn list_local_llms_filters_gguf() {
454        let dir = unique_tmp();
455        std::fs::create_dir_all(&dir).unwrap();
456        std::fs::write(dir.join("a.gguf"), b"x").unwrap();
457        std::fs::write(dir.join("notes.txt"), b"y").unwrap();
458        let found = list_local_llms(&dir);
459        assert_eq!(found.len(), 1);
460        assert_eq!(found[0].file_name, "a.gguf");
461        let _ = std::fs::remove_dir_all(&dir);
462    }
463}