Struct LlamaParams

Source

pub struct LlamaParams {
    pub ctx_size: Option<u32>,
    pub n_gpu_layers: Option<i32>,
    pub threads: Option<u32>,
    pub batch_size: Option<u32>,
    pub parallel: Option<u32>,
    pub seed: Option<i64>,
    pub flash_attn: Option<bool>,
    pub mlock: Option<bool>,
    pub mmap: Option<bool>,
    pub cache_type_k: Option<String>,
    pub cache_type_v: Option<String>,
    pub enable_thinking: Option<bool>,
}

Expand description

Per-model llama-server load parameters (the “Load” tab, à la LM Studio). Absent fields fall back to the server’s own defaults (no flag passed).

Fields§

§ctx_size: Option<u32>

Context window size (--ctx-size).

§n_gpu_layers: Option<i32>

Layers offloaded to the GPU (--n-gpu-layers); 0 = CPU only.

§threads: Option<u32>

CPU threads (--threads).

§batch_size: Option<u32>

Logical batch size for prompt eval (--batch-size).

§parallel: Option<u32>

Parallel sequences / max concurrent predictions (--parallel).

§seed: Option<i64>

RNG seed (--seed); omit for random.

§flash_attn: Option<bool>

Enable Flash Attention (--flash-attn).

§mlock: Option<bool>

Lock the model in RAM (--mlock) - “keep model in memory”.

§mmap: Option<bool>

Memory-map the model file (--mmap); false passes --no-mmap.

§cache_type_k: Option<String>

KV cache K type, e.g. f16, q8_0, q4_0 (--cache-type-k).

§cache_type_v: Option<String>

KV cache V type (--cache-type-v).

§enable_thinking: Option<bool>

For reasoning models: when Some(false), disable “thinking” by passing --reasoning-budget 0 (much faster, lower memory). Only meaningful for models that support it; None/Some(true) leaves the default on.

LlamaParams

Struct LlamaParams Copy item path

Fields§

Trait Implementations§

impl Clone for LlamaParams

fn clone(&self) -> LlamaParams

fn clone_from(&mut self, source: &Self)

impl Debug for LlamaParams

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for LlamaParams

fn default() -> LlamaParams

impl<'de> Deserialize<'de> for LlamaParams

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl Serialize for LlamaParams

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

Auto Trait Implementations§

impl Freeze for LlamaParams

impl RefUnwindSafe for LlamaParams

impl Send for LlamaParams

impl Sync for LlamaParams

impl Unpin for LlamaParams

impl UnsafeUnpin for LlamaParams

impl UnwindSafe for LlamaParams

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,

impl<A, B, T> HttpServerConnExec<A, B> for Twhere B: Body,

Struct LlamaParams

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,

impl<A, B, T> HttpServerConnExec<A, B> for T
where B: Body,