openapi: 3.1.0
info:
  title: Brainiall Specialist AI APIs
  version: 1.0.3
  summary: Production-ready AI APIs for regulated workloads
  description: 9 specialty SKUs (Background Removal, Audio Enhancement, Speaker Diarization, PDF-to-Markdown, Agent Memory,
    Identity Verification, Content Moderation, Document AI / OCR, Vision Labels) plus Speech AI / NLP Suite / Image Processing.
    OpenAI-compatible Bearer auth.
  termsOfService: https://app.brainiall.com/terms
  contact:
    name: Brainiall Support
    url: https://app.brainiall.com/contact
    email: support@brainiall.com
  license:
    name: Brainiall Terms of Service
    url: https://app.brainiall.com/terms
servers:
- url: https://api.brainiall.com
  description: Production
tags:
- name: S1 Background Removal
  description: Drop-in replacement for Azure Image Analysis 4.0
- name: S2 Audio Enhancement
  description: Denoise, voice isolation, cleanup, master in one API
- name: S3 Speaker Diarization
  description: Brainiall Speaker ID engine standalone diarization
- name: S4 PDF to Markdown
  description: Brainiall Document Reader engine, equation/table-aware
- name: S5 Agent Memory
  description: Multi-tenant semantic memory (Mem0 alternative)
- name: Speech AI
  description: Pronunciation / STT / TTS / Brainiall Speech engine STT Pro
- name: NLP Suite
  description: Toxicity / Sentiment / Entities / PII / Language
- name: S6 Identity Verification
  description: Face detect + 512-d embed + anti-spoof (Brainiall face detector + Brainiall Identity engine v1 + Brainiall deepfake module). AWS Rekognition Face
    alternative.
- name: S7 Content Moderation
  description: NSFW binary + body-part region detection (Brainiall NSFW classifier-INT8 + Brainiall NSFW detector). Undercut Rekognition Moderation 20%.
- name: S8 Document AI / OCR
  description: Brainiall Form Parser (Fast tier, quantized) (Fast) + Brainiall Form Parser (Pro tier, quantized) (Pro) printed-text OCR + structured receipt extraction. 5-10× cheaper than AWS
    Textract.
- name: S9 Vision Labels
  description: 'Brainiall object detection module (Fast closed-set) + Brainiall Vision Tagger engine (Standard multi-task: caption / detect / OCR / segment / zero-shot
    prompt).'
components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      bearerFormat: brnl-<product>-<hex>
      description: Get a free API key at https://app.brainiall.com
  schemas:
    Error:
      type: object
      required:
      - error
      properties:
        error:
          type: object
          properties:
            code:
              type: integer
            message:
              type: string
    BackgroundRemovalRequest:
      type: object
      required:
      - image
      properties:
        image:
          type: string
          format: byte
          description: Base64-encoded PNG/JPEG/WebP
        tier:
          type: string
          enum:
          - fast
          - hd
          default: fast
        output_format:
          type: string
          enum:
          - png
          - webp
          default: png
    ImageResponse:
      type: object
      required:
      - image
      properties:
        image:
          type: string
          format: byte
    AudioRequest:
      type: object
      required:
      - audio
      properties:
        audio:
          type: string
          format: byte
        output_format:
          type: string
          enum:
          - wav
          - mp3
          - ogg
          default: wav
        options:
          type: object
          additionalProperties: true
    AudioResponse:
      type: object
      required:
      - audio
      properties:
        audio:
          type: string
          format: byte
        duration_s:
          type: number
    DiarizeRequest:
      type: object
      required:
      - audio
      properties:
        audio:
          type: string
          format: byte
        min_speakers:
          type: integer
          minimum: 1
          default: 1
        max_speakers:
          type: integer
          minimum: 1
          default: 10
    DiarizeTurn:
      type: object
      required:
      - speaker
      - start
      - end
      properties:
        speaker:
          type: string
          example: SPEAKER_00
        start:
          type: number
          format: float
        end:
          type: number
          format: float
        duration:
          type: number
          format: float
    DiarizeResponse:
      type: object
      properties:
        duration_s:
          type: number
        model:
          type: string
          example: Brainiall Speaker ID engine v3.1
        tier:
          type: string
        num_speakers_detected:
          type: integer
        num_turns:
          type: integer
        turns:
          type: array
          items:
            $ref: '#/components/schemas/DiarizeTurn'
        processing_time_s:
          type: number
    PdfRequest:
      type: object
      required:
      - pdf
      properties:
        pdf:
          type: string
          format: byte
        tier:
          type: string
          enum:
          - fast
          default: fast
    MemoryAddRequest:
      type: object
      required:
      - text
      properties:
        text:
          type: string
          minLength: 1
          maxLength: 8192
        metadata:
          type: object
          additionalProperties: true
    MemoryAddResponse:
      type: object
      properties:
        id:
          type: integer
        namespace:
          type: string
        text:
          type: string
    MemorySearchHit:
      type: object
      properties:
        id:
          type: integer
        text:
          type: string
        score:
          type: number
          format: float
          description: Cosine similarity (0-1, higher is more similar)
        created_at:
          type: number
          format: float
          description: Unix timestamp when added
        metadata:
          type: object
          additionalProperties: true
    MemorySearchResponse:
      type: object
      properties:
        namespace:
          type: string
        query:
          type: string
        results:
          type: array
          items:
            $ref: '#/components/schemas/MemorySearchHit'
        tier:
          type: string
          example: fast
        model:
          type: string
          example: BAAI/bge-small-en-v1.5
    IdentityVerifyRequest:
      type: object
      required:
      - image
      properties:
        image:
          type: string
          description: Base64-encoded image (JPEG, PNG, WebP). Max 10 MB.
        include:
          type: array
          items:
            type: string
            enum:
            - detection
            - embedding
            - antispoof
          default:
          - detection
          - embedding
          - antispoof
    IdentityFace:
      type: object
      properties:
        box:
          type: array
          items:
            type: integer
          minItems: 4
          maxItems: 4
          description: '[x, y, w, h] in pixels'
        detection_confidence:
          type: number
          format: float
          minimum: 0
          maximum: 1
        embedding:
          type: array
          items:
            type: number
          description: 512-d face embedding (Brainiall Identity engine v1)
        antispoof_label:
          type: string
          enum:
          - real
          - fake
        antispoof_score:
          type: number
          format: float
          minimum: 0
          maximum: 1
    IdentityVerifyResponse:
      type: object
      properties:
        request_id:
          type: string
        processing_ms:
          type: integer
        faces:
          type: array
          items:
            $ref: '#/components/schemas/IdentityFace'
    ModerationRequest:
      type: object
      required:
      - image
      properties:
        image:
          type: string
          description: Base64-encoded image
        include:
          type: array
          items:
            type: string
            enum:
            - binary
            - regions
          default:
          - binary
          - regions
    ModerationRegion:
      type: object
      properties:
        label:
          type: string
        score:
          type: number
          format: float
          minimum: 0
          maximum: 1
        box:
          type: array
          items:
            type: integer
          minItems: 4
          maxItems: 4
    ModerationResponse:
      type: object
      properties:
        request_id:
          type: string
        processing_ms:
          type: integer
        is_safe:
          type: boolean
        binary:
          type: object
          properties:
            label:
              type: string
              enum:
              - normal
              - nsfw
            score:
              type: number
              format: float
        regions:
          type: array
          items:
            $ref: '#/components/schemas/ModerationRegion'
    OcrExtractRequest:
      type: object
      required:
      - image
      properties:
        image:
          type: string
          description: Base64-encoded image
        tier:
          type: string
          enum:
          - fast
          - pro
          default: fast
          description: fast = Brainiall Form Parser mono-line printed text · pro = Brainiall Form Parser engine multi-line + structured receipt extraction
    OcrExtractResponse:
      type: object
      properties:
        request_id:
          type: string
        processing_ms:
          type: integer
        tier:
          type: string
          enum:
          - fast
          - pro
        text:
          type: string
          nullable: true
          description: Extracted text (null if low_confidence on Fast tier)
        raw_text:
          type: string
          nullable: true
          description: Raw decoder output (only present when low_confidence=true; for diagnostics)
        confidence:
          type: number
          format: float
          minimum: 0
          maximum: 1
          nullable: true
        low_confidence:
          type: boolean
          nullable: true
          description: True if Fast tier rejected output (no-text image, gibberish, or out-of-scope multi-line)
        low_confidence_reason:
          type: string
          nullable: true
        structured:
          type: object
          additionalProperties: true
          description: Brainiall Form Parser engine Pro tier structured output (raw + parsed)
          nullable: true
    VisionLabelsRequest:
      type: object
      required:
      - image
      properties:
        image:
          type: string
          description: Base64-encoded image
        tier:
          type: string
          enum:
          - fast
          - standard
          default: fast
          description: fast = Brainiall object detection module closed-set (COCO 80 classes) · standard = Brainiall Vision Tagger engine multi-task (caption / detect /
            OCR / segment / zero-shot)
        task:
          type: string
          nullable: true
          description: Brainiall Vision Tagger engine task tag (Standard tier only). E.g. <CAPTION>, <DETAILED_CAPTION>, <OD>, <OCR>, <REFERRING_EXPRESSION_SEGMENTATION>
        prompt:
          type: string
          nullable: true
          description: Optional zero-shot text prompt (Standard tier with referring expression tasks)
    VisionLabel:
      type: object
      properties:
        label:
          type: string
        confidence:
          type: number
          format: float
          minimum: 0
          maximum: 1
        box:
          type: array
          items:
            type: integer
          minItems: 4
          maxItems: 4
          nullable: true
    VisionLabelsResponse:
      type: object
      properties:
        request_id:
          type: string
        processing_ms:
          type: integer
        tier:
          type: string
          enum:
          - fast
          - standard
        labels:
          type: array
          items:
            $ref: '#/components/schemas/VisionLabel'
          nullable: true
          description: Fast tier closed-set detections
        caption:
          type: string
          nullable: true
          description: Brainiall Vision Tagger engine caption (Standard tier)
        output:
          type: object
          additionalProperties: true
          description: Brainiall Vision Tagger engine raw multi-task output
          nullable: true
security:
- BearerAuth: []
paths:
  /v1/image/remove-background/base64:
    post:
      tags:
      - S1 Background Removal
      summary: Remove background from image
      description: 'Two tiers: Fast (rembg U2-Net, ~0.4s) or HD (Brainiall Cutout engine, mIoU 0.95, ~2s).'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/BackgroundRemovalRequest'
      responses:
        '200':
          description: Image processed
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ImageResponse'
        '400':
          description: Invalid input
        '401':
          description: Unauthorized
        '429':
          description: Rate limit exceeded
  /v1/audio/enhance/denoise:
    post:
      tags:
      - S2 Audio Enhancement
      summary: Denoise audio (Brainiall Audio Polish (denoise))
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AudioRequest'
      responses:
        '200':
          description: Cleaned audio
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AudioResponse'
  /v1/audio/enhance/voice-isolation:
    post:
      tags:
      - S2 Audio Enhancement
      summary: Isolate vocals (Brainiall Audio Polish (separation))
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AudioRequest'
      responses:
        '200':
          description: Isolated vocals
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AudioResponse'
  /v1/audio/enhance/cleanup:
    post:
      tags:
      - S2 Audio Enhancement
      summary: Remove filler words (Brainiall Speech engine)
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AudioRequest'
      responses:
        '200':
          description: Cleaned audio
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AudioResponse'
  /v1/audio/enhance/master:
    post:
      tags:
      - S2 Audio Enhancement
      summary: Master to -14 LUFS
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AudioRequest'
      responses:
        '200':
          description: Mastered audio
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AudioResponse'
  /v1/audio/diarize/base64:
    post:
      tags:
      - S3 Speaker Diarization
      summary: Diarize speakers in audio
      description: Brainiall Speaker ID engine speaker diarization. 12% DER on AMI.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DiarizeRequest'
      responses:
        '200':
          description: Diarization result
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DiarizeResponse'
  /v1/document/pdf-to-markdown/base64:
    post:
      tags:
      - S4 PDF to Markdown
      summary: Convert PDF to Markdown
      description: 'Brainiall Document Reader engine, preserves headings, tables, equations. Returns raw markdown body (Content-Type: text/markdown).'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PdfRequest'
      responses:
        '200':
          description: Markdown text
          content:
            text/markdown:
              schema:
                type: string
  /v1/memory/{namespace}/add:
    post:
      tags:
      - S5 Agent Memory
      summary: Add fact to namespace
      parameters:
      - name: namespace
        in: path
        required: true
        schema:
          type: string
          pattern: ^[a-zA-Z0-9_-]{1,64}$
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/MemoryAddRequest'
      responses:
        '200':
          description: Added
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/MemoryAddResponse'
  /v1/memory/{namespace}/search:
    get:
      tags:
      - S5 Agent Memory
      summary: Semantic search in namespace
      parameters:
      - name: namespace
        in: path
        required: true
        schema:
          type: string
      - name: q
        in: query
        required: true
        schema:
          type: string
      - name: k
        in: query
        schema:
          type: integer
          default: 5
          minimum: 1
          maximum: 100
      responses:
        '200':
          description: Top-k hits
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/MemorySearchResponse'
  /v1/pronunciation/assess/base64:
    post:
      tags:
      - Speech AI
      summary: Score pronunciation accuracy
      responses:
        '200':
          description: Pronunciation scores
  /v1/stt/transcribe/base64:
    post:
      tags:
      - Speech AI
      summary: Speech to text (Brainiall Speech engine)
      responses:
        '200':
          description: Transcription with word timestamps
  /v1/tts/synthesize/base64:
    post:
      tags:
      - Speech AI
      summary: Text to speech (Brainiall Voice)
      responses:
        '200':
          description: Synthesized audio
  /v1/whisper/transcribe/base64:
    post:
      tags:
      - Speech AI
      summary: Brainiall Speech (Pro tier) (99 languages)
      responses:
        '200':
          description: Transcription + word timestamps + optional speaker labels
  /v1/nlp/toxicity:
    post:
      tags:
      - NLP Suite
      summary: Toxicity detection
      responses:
        '200':
          description: Toxicity score
  /v1/nlp/sentiment:
    post:
      tags:
      - NLP Suite
      summary: Sentiment analysis
      responses:
        '200':
          description: Sentiment label + score
  /v1/nlp/entities:
    post:
      tags:
      - NLP Suite
      summary: Named entity recognition
      responses:
        '200':
          description: Entity list
  /v1/nlp/pii:
    post:
      tags:
      - NLP Suite
      summary: PII detection (BERT + regex)
      responses:
        '200':
          description: Detected PII spans
  /v1/nlp/language:
    post:
      tags:
      - NLP Suite
      summary: Language detection
      responses:
        '200':
          description: Language code + confidence
  /v1/identity/verify/base64:
    post:
      tags:
      - S6 Identity Verification
      summary: Verify face identity (detect + embed + anti-spoof)
      description: Detect faces (Brainiall face detector), extract 512-d embeddings (Brainiall Identity engine v1, Apache 2.0), and assess anti-spoofing
        (Brainiall deepfake module ViT). 154 ms p50 CPU. KYC, fraud detection, access control.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/IdentityVerifyRequest'
      responses:
        '200':
          description: Faces detected and analyzed
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/IdentityVerifyResponse'
        '400':
          description: Invalid input
        '401':
          description: Unauthorized
        '429':
          description: Rate limit exceeded
  /v1/moderation/analyze/base64:
    post:
      tags:
      - S7 Content Moderation
      summary: Analyze image for NSFW + body-part regions
      description: Brainiall NSFW classifier (quantized) (binary classifier) + Brainiall NSFW detector (region detector). 91 ms p50 CPU.
        20% cheaper than AWS Rekognition Moderation. permissively licensed.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ModerationRequest'
      responses:
        '200':
          description: Image classified and analyzed
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModerationResponse'
        '400':
          description: Invalid input
        '401':
          description: Unauthorized
        '429':
          description: Rate limit exceeded
  /v1/ocr/extract/base64:
    post:
      tags:
      - S8 Document AI / OCR
      summary: Extract text from image (Fast = Brainiall Form Parser, Pro = Brainiall Form Parser engine)
      description: 'Two tiers: Fast (Brainiall Form Parser (Fast tier, quantized) + confidence threshold + content sanity filter, mono-line, 327
        ms) or Pro (Brainiall Form Parser (Pro tier, quantized), multi-line + structured receipt extraction, 2.4 s). 5-10× cheaper than AWS Textract
        for receipts. permissively licensed.'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/OcrExtractRequest'
      responses:
        '200':
          description: Text extracted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/OcrExtractResponse'
        '400':
          description: Invalid input
        '401':
          description: Unauthorized
        '429':
          description: Rate limit exceeded
  /v1/vision/labels/base64:
    post:
      tags:
      - S9 Vision Labels
      summary: Detect labels and captions in image (Fast = Brainiall object detection module closed-set, Standard = Brainiall Vision Tagger engine multi-task)
      description: 'Fast tier: Brainiall object detection module (COCO 80 classes, 236 ms). Standard tier: Brainiall Vision Tagger engine multi-task (caption / detect /
        OCR / segment / zero-shot text prompts, 1.6 s). Capability AWS Rekognition does not offer: zero-shot prompting + dense
        captions. permissively licensed.'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/VisionLabelsRequest'
      responses:
        '200':
          description: Labels and/or caption returned
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VisionLabelsResponse'
        '400':
          description: Invalid input
        '401':
          description: Unauthorized
        '429':
          description: Rate limit exceeded
