// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

import { APIResource } from '../../core/resource';
import * as GraderModelsAPI from './grader-models';
import * as Shared from '../shared';
import * as ResponsesAPI from '../responses/responses';

export class GraderModels extends APIResource {}

/**
 * A list of inputs, each of which may be either an input text, output text, input
 * image, or input audio object.
 */
export type GraderInputs = Array<
  | string
  | ResponsesAPI.ResponseInputText
  | GraderInputs.OutputText
  | GraderInputs.InputImage
  | ResponsesAPI.ResponseInputAudio
>;

export namespace GraderInputs {
  /**
   * A text output from the model.
   */
  export interface OutputText {
    /**
     * The text output from the model.
     */
    text: string;

    /**
     * The type of the output text. Always `output_text`.
     */
    type: 'output_text';
  }

  /**
   * An image input block used within EvalItem content arrays.
   */
  export interface InputImage {
    /**
     * The URL of the image input.
     */
    image_url: string;

    /**
     * The type of the image input. Always `input_image`.
     */
    type: 'input_image';

    /**
     * The detail level of the image to be sent to the model. One of `high`, `low`, or
     * `auto`. Defaults to `auto`.
     */
    detail?: string;
  }
}

/**
 * A LabelModelGrader object which uses a model to assign labels to each item in
 * the evaluation.
 */
export interface LabelModelGrader {
  input: Array<LabelModelGrader.Input>;

  /**
   * The labels to assign to each item in the evaluation.
   */
  labels: Array<string>;

  /**
   * The model to use for the evaluation. Must support structured outputs.
   */
  model: string;

  /**
   * The name of the grader.
   */
  name: string;

  /**
   * The labels that indicate a passing result. Must be a subset of labels.
   */
  passing_labels: Array<string>;

  /**
   * The object type, which is always `label_model`.
   */
  type: 'label_model';
}

export namespace LabelModelGrader {
  /**
   * A message input to the model with a role indicating instruction following
   * hierarchy. Instructions given with the `developer` or `system` role take
   * precedence over instructions given with the `user` role. Messages with the
   * `assistant` role are presumed to have been generated by the model in previous
   * interactions.
   */
  export interface Input {
    /**
     * Inputs to the model - can contain template strings. Supports text, output text,
     * input images, and input audio, either as a single item or an array of items.
     */
    content:
      | string
      | ResponsesAPI.ResponseInputText
      | Input.OutputText
      | Input.InputImage
      | ResponsesAPI.ResponseInputAudio
      | GraderModelsAPI.GraderInputs;

    /**
     * The role of the message input. One of `user`, `assistant`, `system`, or
     * `developer`.
     */
    role: 'user' | 'assistant' | 'system' | 'developer';

    /**
     * The type of the message input. Always `message`.
     */
    type?: 'message';
  }

  export namespace Input {
    /**
     * A text output from the model.
     */
    export interface OutputText {
      /**
       * The text output from the model.
       */
      text: string;

      /**
       * The type of the output text. Always `output_text`.
       */
      type: 'output_text';
    }

    /**
     * An image input block used within EvalItem content arrays.
     */
    export interface InputImage {
      /**
       * The URL of the image input.
       */
      image_url: string;

      /**
       * The type of the image input. Always `input_image`.
       */
      type: 'input_image';

      /**
       * The detail level of the image to be sent to the model. One of `high`, `low`, or
       * `auto`. Defaults to `auto`.
       */
      detail?: string;
    }
  }
}

/**
 * A MultiGrader object combines the output of multiple graders to produce a single
 * score.
 */
export interface MultiGrader {
  /**
   * A formula to calculate the output based on grader results.
   */
  calculate_output: string;

  /**
   * A StringCheckGrader object that performs a string comparison between input and
   * reference using a specified operation.
   */
  graders: StringCheckGrader | TextSimilarityGrader | PythonGrader | ScoreModelGrader | LabelModelGrader;

  /**
   * The name of the grader.
   */
  name: string;

  /**
   * The object type, which is always `multi`.
   */
  type: 'multi';
}

/**
 * A PythonGrader object that runs a python script on the input.
 */
export interface PythonGrader {
  /**
   * The name of the grader.
   */
  name: string;

  /**
   * The source code of the python script.
   */
  source: string;

  /**
   * The object type, which is always `python`.
   */
  type: 'python';

  /**
   * The image tag to use for the python script.
   */
  image_tag?: string;
}

/**
 * A ScoreModelGrader object that uses a model to assign a score to the input.
 */
export interface ScoreModelGrader {
  /**
   * The input messages evaluated by the grader. Supports text, output text, input
   * image, and input audio content blocks, and may include template strings.
   */
  input: Array<ScoreModelGrader.Input>;

  /**
   * The model to use for the evaluation.
   */
  model: string;

  /**
   * The name of the grader.
   */
  name: string;

  /**
   * The object type, which is always `score_model`.
   */
  type: 'score_model';

  /**
   * The range of the score. Defaults to `[0, 1]`.
   */
  range?: Array<number>;

  /**
   * The sampling parameters for the model.
   */
  sampling_params?: ScoreModelGrader.SamplingParams;
}

export namespace ScoreModelGrader {
  /**
   * A message input to the model with a role indicating instruction following
   * hierarchy. Instructions given with the `developer` or `system` role take
   * precedence over instructions given with the `user` role. Messages with the
   * `assistant` role are presumed to have been generated by the model in previous
   * interactions.
   */
  export interface Input {
    /**
     * Inputs to the model - can contain template strings. Supports text, output text,
     * input images, and input audio, either as a single item or an array of items.
     */
    content:
      | string
      | ResponsesAPI.ResponseInputText
      | Input.OutputText
      | Input.InputImage
      | ResponsesAPI.ResponseInputAudio
      | GraderModelsAPI.GraderInputs;

    /**
     * The role of the message input. One of `user`, `assistant`, `system`, or
     * `developer`.
     */
    role: 'user' | 'assistant' | 'system' | 'developer';

    /**
     * The type of the message input. Always `message`.
     */
    type?: 'message';
  }

  export namespace Input {
    /**
     * A text output from the model.
     */
    export interface OutputText {
      /**
       * The text output from the model.
       */
      text: string;

      /**
       * The type of the output text. Always `output_text`.
       */
      type: 'output_text';
    }

    /**
     * An image input block used within EvalItem content arrays.
     */
    export interface InputImage {
      /**
       * The URL of the image input.
       */
      image_url: string;

      /**
       * The type of the image input. Always `input_image`.
       */
      type: 'input_image';

      /**
       * The detail level of the image to be sent to the model. One of `high`, `low`, or
       * `auto`. Defaults to `auto`.
       */
      detail?: string;
    }
  }

  /**
   * The sampling parameters for the model.
   */
  export interface SamplingParams {
    /**
     * The maximum number of tokens the grader model may generate in its response.
     */
    max_completions_tokens?: number | null;

    /**
     * Constrains effort on reasoning for
     * [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
     * supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
     * Reducing reasoning effort can result in faster responses and fewer tokens used
     * on reasoning in a response.
     *
     * - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
     *   reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
     *   calls are supported for all reasoning values in gpt-5.1.
     * - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
     *   support `none`.
     * - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
     * - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
     */
    reasoning_effort?: Shared.ReasoningEffort | null;

    /**
     * A seed value to initialize the randomness, during sampling.
     */
    seed?: number | null;

    /**
     * A higher temperature increases randomness in the outputs.
     */
    temperature?: number | null;

    /**
     * An alternative to temperature for nucleus sampling; 1.0 includes all tokens.
     */
    top_p?: number | null;
  }
}

/**
 * A StringCheckGrader object that performs a string comparison between input and
 * reference using a specified operation.
 */
export interface StringCheckGrader {
  /**
   * The input text. This may include template strings.
   */
  input: string;

  /**
   * The name of the grader.
   */
  name: string;

  /**
   * The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`.
   */
  operation: 'eq' | 'ne' | 'like' | 'ilike';

  /**
   * The reference text. This may include template strings.
   */
  reference: string;

  /**
   * The object type, which is always `string_check`.
   */
  type: 'string_check';
}

/**
 * A TextSimilarityGrader object which grades text based on similarity metrics.
 */
export interface TextSimilarityGrader {
  /**
   * The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, `gleu`,
   * `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`.
   */
  evaluation_metric:
    | 'cosine'
    | 'fuzzy_match'
    | 'bleu'
    | 'gleu'
    | 'meteor'
    | 'rouge_1'
    | 'rouge_2'
    | 'rouge_3'
    | 'rouge_4'
    | 'rouge_5'
    | 'rouge_l';

  /**
   * The text being graded.
   */
  input: string;

  /**
   * The name of the grader.
   */
  name: string;

  /**
   * The text being graded against.
   */
  reference: string;

  /**
   * The type of grader.
   */
  type: 'text_similarity';
}

export declare namespace GraderModels {
  export {
    type GraderInputs as GraderInputs,
    type LabelModelGrader as LabelModelGrader,
    type MultiGrader as MultiGrader,
    type PythonGrader as PythonGrader,
    type ScoreModelGrader as ScoreModelGrader,
    type StringCheckGrader as StringCheckGrader,
    type TextSimilarityGrader as TextSimilarityGrader,
  };
}