Inference

Adding a Custom Provider

All inference providers implement the InferenceClient interface from inference/types.ts. Adding a new provider requires three steps: implement the interface, add the provider enum value, and register it in the factory.

InferenceClient interface

typescript
// src/inference/types.ts
export interface InferenceClient {
  chat(request: InferenceRequest): Promise<InferenceResponse>;
  chatStream(request: InferenceRequest): AsyncGenerator<InferenceStreamDelta>;
}

export interface InferenceRequest {
  messages: ChatMessage[];
  model: string;
  temperature?: number;
  maxTokens?: number;
  tools?: ToolSchema[];
}

export interface InferenceResponse {
  content: string;
  toolCalls?: ToolCall[];
  usage: {
    promptTokens: number;
    completionTokens: number;
    cachedTokens?: number;
  };
}

export interface InferenceStreamDelta {
  type: 'text' | 'tool_call' | 'done';
  text?: string;
  toolCall?: Partial<ToolCall>;
}

Step 1 — Implement the client

Create src/inference/my-provider.ts:

typescript
// src/inference/my-provider.ts
import type {
  InferenceClient,
  InferenceRequest,
  InferenceResponse,
  InferenceStreamDelta,
} from './types';
import { createLogger } from '../utils/logger';

const logger = createLogger('my-provider');

export class MyProviderClient implements InferenceClient {
  private apiKey: string;
  private baseUrl: string;

  constructor(apiKey: string, baseUrl = 'https://api.my-provider.com') {
    this.apiKey = apiKey;
    this.baseUrl = baseUrl;
  }

  async chat(request: InferenceRequest): Promise<InferenceResponse> {
    const response = await fetch(\`${this.baseUrl}/v1/chat/completions\`, {
      method: 'POST',
      headers: {
        'Authorization': \`Bearer ${this.apiKey}\`,
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        model: request.model,
        messages: request.messages,
        temperature: request.temperature ?? 0.7,
        max_tokens: request.maxTokens ?? 4096,
      }),
    });

    if (!response.ok) {
      throw new Error(\`MyProvider error: ${response.status} ${await response.text()}\`);
    }

    const data = await response.json();
    return {
      content: data.choices[0].message.content,
      usage: {
        promptTokens: data.usage.prompt_tokens,
        completionTokens: data.usage.completion_tokens,
      },
    };
  }

  async *chatStream(request: InferenceRequest): AsyncGenerator<InferenceStreamDelta> {
    // Implement SSE streaming here
    // Yield { type: 'text', text: '...' } for each token
    // Yield { type: 'done' } when complete
    yield { type: 'done' };
  }
}

Step 2 — Add the provider to the enum

Add your provider ID to the Zod enum in src/agents/types.ts:

typescript
// src/agents/types.ts
export const ProviderEnum = z.enum([
  'grok', 'groq', 'openai', 'gemini', 'claude',
  'ollama', 'vllm', 'bedrock', 'mistral', 'openrouter',
  'my-provider',  // Add your provider here
]);

Step 3 — Register in the factory

Add a case in createPrimaryClient() in src/inference/factory.ts:

typescript
// src/inference/factory.ts
import { MyProviderClient } from './my-provider';

function createPrimaryClient(config: AgentModelConfig): InferenceClient {
  switch (config.provider) {
    // ... existing cases ...
    case 'my-provider':
      return new MyProviderClient(
        env.MY_PROVIDER_API_KEY,
        env.MY_PROVIDER_BASE_URL,
      );
    default:
      throw new Error(\`Unknown provider: ${config.provider}\`);
  }
}

Step 4 — Add environment variables

Add your provider's env vars to src/config/env.ts:

typescript
// src/config/env.ts
export const env = {
  // ... existing vars ...
  MY_PROVIDER_API_KEY: process.env.MY_PROVIDER_API_KEY ?? '',
  MY_PROVIDER_BASE_URL: process.env.MY_PROVIDER_BASE_URL ?? 'https://api.my-provider.com',
};

After these four steps, run npm run typecheck to verify there are no type errors, then configure an agent to use provider: my-provider in astra.yml.