kiln_ai.adapters.base_adapter

  1import json
  2from abc import ABCMeta, abstractmethod
  3from dataclasses import dataclass
  4from typing import Dict
  5
  6from kiln_ai.datamodel import (
  7    DataSource,
  8    DataSourceType,
  9    Task,
 10    TaskOutput,
 11    TaskRun,
 12)
 13from kiln_ai.datamodel.json_schema import validate_schema
 14from kiln_ai.utils.config import Config
 15
 16from .prompt_builders import BasePromptBuilder, SimplePromptBuilder
 17
 18
 19@dataclass
 20class AdapterInfo:
 21    adapter_name: str
 22    model_name: str
 23    model_provider: str
 24    prompt_builder_name: str
 25
 26
 27@dataclass
 28class RunOutput:
 29    output: Dict | str
 30    intermediate_outputs: Dict[str, str] | None
 31
 32
 33class BaseAdapter(metaclass=ABCMeta):
 34    """Base class for AI model adapters that handle task execution.
 35
 36    This abstract class provides the foundation for implementing model-specific adapters
 37    that can process tasks with structured or unstructured inputs/outputs. It handles
 38    input/output validation, prompt building, and run tracking.
 39
 40    Attributes:
 41        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 42        kiln_task (Task): The task configuration and metadata
 43        output_schema (dict | None): JSON schema for validating structured outputs
 44        input_schema (dict | None): JSON schema for validating structured inputs
 45    """
 46
 47    def __init__(
 48        self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None
 49    ):
 50        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 51        self.kiln_task = kiln_task
 52        self.output_schema = self.kiln_task.output_json_schema
 53        self.input_schema = self.kiln_task.input_json_schema
 54
 55    async def invoke_returning_raw(
 56        self,
 57        input: Dict | str,
 58        input_source: DataSource | None = None,
 59    ) -> Dict | str:
 60        result = await self.invoke(input, input_source)
 61        if self.kiln_task.output_json_schema is None:
 62            return result.output.output
 63        else:
 64            return json.loads(result.output.output)
 65
 66    async def invoke(
 67        self,
 68        input: Dict | str,
 69        input_source: DataSource | None = None,
 70    ) -> TaskRun:
 71        # validate input
 72        if self.input_schema is not None:
 73            if not isinstance(input, dict):
 74                raise ValueError(f"structured input is not a dict: {input}")
 75            validate_schema(input, self.input_schema)
 76
 77        # Run
 78        run_output = await self._run(input)
 79
 80        # validate output
 81        if self.output_schema is not None:
 82            if not isinstance(run_output.output, dict):
 83                raise RuntimeError(
 84                    f"structured response is not a dict: {run_output.output}"
 85                )
 86            validate_schema(run_output.output, self.output_schema)
 87        else:
 88            if not isinstance(run_output.output, str):
 89                raise RuntimeError(
 90                    f"response is not a string for non-structured task: {run_output.output}"
 91                )
 92
 93        # Generate the run and output
 94        run = self.generate_run(input, input_source, run_output)
 95
 96        # Save the run if configured to do so, and we have a path to save to
 97        if Config.shared().autosave_runs and self.kiln_task.path is not None:
 98            run.save_to_file()
 99        else:
100            # Clear the ID to indicate it's not persisted
101            run.id = None
102
103        return run
104
105    def has_structured_output(self) -> bool:
106        return self.output_schema is not None
107
108    @abstractmethod
109    def adapter_info(self) -> AdapterInfo:
110        pass
111
112    @abstractmethod
113    async def _run(self, input: Dict | str) -> RunOutput:
114        pass
115
116    def build_prompt(self) -> str:
117        return self.prompt_builder.build_prompt()
118
119    # create a run and task output
120    def generate_run(
121        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
122    ) -> TaskRun:
123        # Convert input and output to JSON strings if they are dictionaries
124        input_str = json.dumps(input) if isinstance(input, dict) else input
125        output_str = (
126            json.dumps(run_output.output)
127            if isinstance(run_output.output, dict)
128            else run_output.output
129        )
130
131        # If no input source is provided, use the human data source
132        if input_source is None:
133            input_source = DataSource(
134                type=DataSourceType.human,
135                properties={"created_by": Config.shared().user_id},
136            )
137
138        new_task_run = TaskRun(
139            parent=self.kiln_task,
140            input=input_str,
141            input_source=input_source,
142            output=TaskOutput(
143                output=output_str,
144                # Synthetic since an adapter, not a human, is creating this
145                source=DataSource(
146                    type=DataSourceType.synthetic,
147                    properties=self._properties_for_task_output(),
148                ),
149            ),
150            intermediate_outputs=run_output.intermediate_outputs,
151        )
152
153        exclude_fields = {
154            "id": True,
155            "created_at": True,
156            "updated_at": True,
157            "path": True,
158            "output": {"id": True, "created_at": True, "updated_at": True},
159        }
160        new_run_dump = new_task_run.model_dump(exclude=exclude_fields)
161
162        # Check if the same run already exists
163        existing_task_run = next(
164            (
165                task_run
166                for task_run in self.kiln_task.runs()
167                if task_run.model_dump(exclude=exclude_fields) == new_run_dump
168            ),
169            None,
170        )
171        if existing_task_run:
172            return existing_task_run
173
174        return new_task_run
175
176    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
177        props = {}
178
179        # adapter info
180        adapter_info = self.adapter_info()
181        props["adapter_name"] = adapter_info.adapter_name
182        props["model_name"] = adapter_info.model_name
183        props["model_provider"] = adapter_info.model_provider
184        props["prompt_builder_name"] = adapter_info.prompt_builder_name
185
186        return props
@dataclass
class AdapterInfo:
20@dataclass
21class AdapterInfo:
22    adapter_name: str
23    model_name: str
24    model_provider: str
25    prompt_builder_name: str
AdapterInfo( adapter_name: str, model_name: str, model_provider: str, prompt_builder_name: str)
adapter_name: str
model_name: str
model_provider: str
prompt_builder_name: str
@dataclass
class RunOutput:
28@dataclass
29class RunOutput:
30    output: Dict | str
31    intermediate_outputs: Dict[str, str] | None
RunOutput( output: Union[Dict, str], intermediate_outputs: Optional[Dict[str, str]])
output: Union[Dict, str]
intermediate_outputs: Optional[Dict[str, str]]
class BaseAdapter:
 34class BaseAdapter(metaclass=ABCMeta):
 35    """Base class for AI model adapters that handle task execution.
 36
 37    This abstract class provides the foundation for implementing model-specific adapters
 38    that can process tasks with structured or unstructured inputs/outputs. It handles
 39    input/output validation, prompt building, and run tracking.
 40
 41    Attributes:
 42        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 43        kiln_task (Task): The task configuration and metadata
 44        output_schema (dict | None): JSON schema for validating structured outputs
 45        input_schema (dict | None): JSON schema for validating structured inputs
 46    """
 47
 48    def __init__(
 49        self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None
 50    ):
 51        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 52        self.kiln_task = kiln_task
 53        self.output_schema = self.kiln_task.output_json_schema
 54        self.input_schema = self.kiln_task.input_json_schema
 55
 56    async def invoke_returning_raw(
 57        self,
 58        input: Dict | str,
 59        input_source: DataSource | None = None,
 60    ) -> Dict | str:
 61        result = await self.invoke(input, input_source)
 62        if self.kiln_task.output_json_schema is None:
 63            return result.output.output
 64        else:
 65            return json.loads(result.output.output)
 66
 67    async def invoke(
 68        self,
 69        input: Dict | str,
 70        input_source: DataSource | None = None,
 71    ) -> TaskRun:
 72        # validate input
 73        if self.input_schema is not None:
 74            if not isinstance(input, dict):
 75                raise ValueError(f"structured input is not a dict: {input}")
 76            validate_schema(input, self.input_schema)
 77
 78        # Run
 79        run_output = await self._run(input)
 80
 81        # validate output
 82        if self.output_schema is not None:
 83            if not isinstance(run_output.output, dict):
 84                raise RuntimeError(
 85                    f"structured response is not a dict: {run_output.output}"
 86                )
 87            validate_schema(run_output.output, self.output_schema)
 88        else:
 89            if not isinstance(run_output.output, str):
 90                raise RuntimeError(
 91                    f"response is not a string for non-structured task: {run_output.output}"
 92                )
 93
 94        # Generate the run and output
 95        run = self.generate_run(input, input_source, run_output)
 96
 97        # Save the run if configured to do so, and we have a path to save to
 98        if Config.shared().autosave_runs and self.kiln_task.path is not None:
 99            run.save_to_file()
100        else:
101            # Clear the ID to indicate it's not persisted
102            run.id = None
103
104        return run
105
106    def has_structured_output(self) -> bool:
107        return self.output_schema is not None
108
109    @abstractmethod
110    def adapter_info(self) -> AdapterInfo:
111        pass
112
113    @abstractmethod
114    async def _run(self, input: Dict | str) -> RunOutput:
115        pass
116
117    def build_prompt(self) -> str:
118        return self.prompt_builder.build_prompt()
119
120    # create a run and task output
121    def generate_run(
122        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
123    ) -> TaskRun:
124        # Convert input and output to JSON strings if they are dictionaries
125        input_str = json.dumps(input) if isinstance(input, dict) else input
126        output_str = (
127            json.dumps(run_output.output)
128            if isinstance(run_output.output, dict)
129            else run_output.output
130        )
131
132        # If no input source is provided, use the human data source
133        if input_source is None:
134            input_source = DataSource(
135                type=DataSourceType.human,
136                properties={"created_by": Config.shared().user_id},
137            )
138
139        new_task_run = TaskRun(
140            parent=self.kiln_task,
141            input=input_str,
142            input_source=input_source,
143            output=TaskOutput(
144                output=output_str,
145                # Synthetic since an adapter, not a human, is creating this
146                source=DataSource(
147                    type=DataSourceType.synthetic,
148                    properties=self._properties_for_task_output(),
149                ),
150            ),
151            intermediate_outputs=run_output.intermediate_outputs,
152        )
153
154        exclude_fields = {
155            "id": True,
156            "created_at": True,
157            "updated_at": True,
158            "path": True,
159            "output": {"id": True, "created_at": True, "updated_at": True},
160        }
161        new_run_dump = new_task_run.model_dump(exclude=exclude_fields)
162
163        # Check if the same run already exists
164        existing_task_run = next(
165            (
166                task_run
167                for task_run in self.kiln_task.runs()
168                if task_run.model_dump(exclude=exclude_fields) == new_run_dump
169            ),
170            None,
171        )
172        if existing_task_run:
173            return existing_task_run
174
175        return new_task_run
176
177    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
178        props = {}
179
180        # adapter info
181        adapter_info = self.adapter_info()
182        props["adapter_name"] = adapter_info.adapter_name
183        props["model_name"] = adapter_info.model_name
184        props["model_provider"] = adapter_info.model_provider
185        props["prompt_builder_name"] = adapter_info.prompt_builder_name
186
187        return props

Base class for AI model adapters that handle task execution.

This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.

Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs

prompt_builder
kiln_task
output_schema
input_schema
async def invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
56    async def invoke_returning_raw(
57        self,
58        input: Dict | str,
59        input_source: DataSource | None = None,
60    ) -> Dict | str:
61        result = await self.invoke(input, input_source)
62        if self.kiln_task.output_json_schema is None:
63            return result.output.output
64        else:
65            return json.loads(result.output.output)
async def invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
 67    async def invoke(
 68        self,
 69        input: Dict | str,
 70        input_source: DataSource | None = None,
 71    ) -> TaskRun:
 72        # validate input
 73        if self.input_schema is not None:
 74            if not isinstance(input, dict):
 75                raise ValueError(f"structured input is not a dict: {input}")
 76            validate_schema(input, self.input_schema)
 77
 78        # Run
 79        run_output = await self._run(input)
 80
 81        # validate output
 82        if self.output_schema is not None:
 83            if not isinstance(run_output.output, dict):
 84                raise RuntimeError(
 85                    f"structured response is not a dict: {run_output.output}"
 86                )
 87            validate_schema(run_output.output, self.output_schema)
 88        else:
 89            if not isinstance(run_output.output, str):
 90                raise RuntimeError(
 91                    f"response is not a string for non-structured task: {run_output.output}"
 92                )
 93
 94        # Generate the run and output
 95        run = self.generate_run(input, input_source, run_output)
 96
 97        # Save the run if configured to do so, and we have a path to save to
 98        if Config.shared().autosave_runs and self.kiln_task.path is not None:
 99            run.save_to_file()
100        else:
101            # Clear the ID to indicate it's not persisted
102            run.id = None
103
104        return run
def has_structured_output(self) -> bool:
106    def has_structured_output(self) -> bool:
107        return self.output_schema is not None
@abstractmethod
def adapter_info(self) -> AdapterInfo:
109    @abstractmethod
110    def adapter_info(self) -> AdapterInfo:
111        pass
def build_prompt(self) -> str:
117    def build_prompt(self) -> str:
118        return self.prompt_builder.build_prompt()
def generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, run_output: RunOutput) -> kiln_ai.datamodel.TaskRun:
121    def generate_run(
122        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
123    ) -> TaskRun:
124        # Convert input and output to JSON strings if they are dictionaries
125        input_str = json.dumps(input) if isinstance(input, dict) else input
126        output_str = (
127            json.dumps(run_output.output)
128            if isinstance(run_output.output, dict)
129            else run_output.output
130        )
131
132        # If no input source is provided, use the human data source
133        if input_source is None:
134            input_source = DataSource(
135                type=DataSourceType.human,
136                properties={"created_by": Config.shared().user_id},
137            )
138
139        new_task_run = TaskRun(
140            parent=self.kiln_task,
141            input=input_str,
142            input_source=input_source,
143            output=TaskOutput(
144                output=output_str,
145                # Synthetic since an adapter, not a human, is creating this
146                source=DataSource(
147                    type=DataSourceType.synthetic,
148                    properties=self._properties_for_task_output(),
149                ),
150            ),
151            intermediate_outputs=run_output.intermediate_outputs,
152        )
153
154        exclude_fields = {
155            "id": True,
156            "created_at": True,
157            "updated_at": True,
158            "path": True,
159            "output": {"id": True, "created_at": True, "updated_at": True},
160        }
161        new_run_dump = new_task_run.model_dump(exclude=exclude_fields)
162
163        # Check if the same run already exists
164        existing_task_run = next(
165            (
166                task_run
167                for task_run in self.kiln_task.runs()
168                if task_run.model_dump(exclude=exclude_fields) == new_run_dump
169            ),
170            None,
171        )
172        if existing_task_run:
173            return existing_task_run
174
175        return new_task_run