Skip to main content

Observability with the Evaluate Endpoint

Bluejay provides powerful observability capabilities through the evaluate endpoint, allowing you to analyze agent performance, detect hallucinations, measure redundancy, and track custom metrics. This cookbook will guide you through using the evaluate endpoint with practical Python examples.

Understanding the Evaluate Endpoint

The evaluate endpoint (POST /v1/evaluate) allows you to submit call data for comprehensive analysis. It accepts audio recordings, transcripts, and metadata, and creates an evaluation job to retrieve later. On retrieval, it will provide detailed performance metrics and insights. Key Features:
  • Hallucination Detection: Identifies when agents provide incorrect or fabricated information
  • Redundancy Analysis: Measures unnecessary repetition in agent responses
  • Custom Metrics: Track domain-specific performance indicators
  • Multi-modal Analysis: Works with both audio and text transcripts

Quick Start: Evaluating a Call

import requests
import json
from datetime import datetime, timezone

def evaluate_call_basic(recording_url, bluejay_agent_id):
    """Basic example of evaluating a call with audio and transcript"""

    # Your API configuration
    api_key = "your-api-key-here"
    base_url = "https://api.getbluejay.ai/v1"

    # Prepare evaluation data
    call_data = {
        "agent_id": bluejay_agent_id,
        "recording_url": recording_url,
        "start_time_utc": datetime.now(timezone.utc).isoformat(),
        "participants": [
            {
                "role": "USER",
                "spoke_first": False
            },
            {
                "role": "AGENT",
                "spoke_first": True
            }
        ]
    }

    # Submit for evaluation
    headers = {
        "X-API-Key": api_key,
        "Content-Type": "application/json"
    }

    response = requests.post(
        f"{base_url}/evaluate",
        headers=headers,
        json=call_data
    )

    if response.status_code == 200:
        result = response.json()
        print(f"Evaluation submitted! Call ID: {result['call_id']}")
        return result['call_id']
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

# Usage
call_id = evaluate_call_basic(recording_url, bluejay_agent_id)

Working with Audio Files

For the best evaluation results, provide high-quality audio recordings. The endpoint supports multiple audio formats and automatically transcribes audio if no transcript is provided.
import requests
import json
from datetime import datetime, timezone
import os

def evaluate_with_audio_file():
    """Example of evaluating a call with a local audio file"""

    api_key = "your-api-key-here"
    base_url = "https://api.getbluejay.ai/v1"

    # Upload audio file to get a public URL (implement based on your storage)
    def upload_audio_file(file_path):
        """Upload audio file to storage and return public URL"""
        # This is a placeholder - implement based on your storage solution
        # Examples: AWS S3, Google Cloud Storage, Azure Blob Storage
        pass

    # Prepare call data with audio
    audio_file_path = "local/path/to/your/call-recording.wav"


    # Upload audio file
    recording_url = upload_audio_file(audio_file_path)

    call_data = {
        "agent_id": "123",
        "recording_url": recording_url,
        "start_time_utc": datetime.now(timezone.utc).isoformat(),
        "participants": [
            {
                "role": "USER",
                "spoke_first": False
            },
            {
                "role": "AGENT",
                "spoke_first": True
            }
        ],
        "tool_calls": []  # Include if your agent made any tool calls
    }

    # Submit for evaluation
    headers = {
        "X-API-Key": api_key,
        "Content-Type": "application/json"
    }

    response = requests.post(
        f"{base_url}/evaluate",
        headers=headers,
        json=call_data
    )

    return response.json()

# Example with AWS S3 upload
import boto3
from botocore.exceptions import NoCredentialsError

def upload_to_s3(file_path, bucket_name, s3_key):
    """Upload file to S3 and return public URL"""
    s3 = boto3.client('s3')

    try:
        s3.upload_file(file_path, bucket_name, s3_key)
        url = f"https://{bucket_name}.s3.amazonaws.com/{s3_key}"
        return url
    except NoCredentialsError:
        print("AWS credentials not found")
        return None

Working with Transcripts

Provide transcripts if you pre-transcribe your audio or do not have an audio file. You can provide transcripts as structured data or as a URL to a transcript file.
import requests
import json
from datetime import datetime, timezone

def evaluate_with_transcript_data():
    """Example of evaluating a call with inline transcript data"""

    api_key = "your-api-key-here"
    base_url = "https://api.getbluejay.ai/v1"

    # Sample transcript data
    transcript = [
        {
            "start_offset_ms": 100,
            "end_offset_ms": 1000,
            "speaker": "AGENT",
            "utterance": "Hello! Thank you for calling Bluejay support. How can I help you today?"
        },
        {
            "start_offset_ms": 1000,
            "end_offset_ms": 2000,
            "speaker": "USER",
            "utterance": "Hi, I'm having trouble with my recent order. It says it's delayed but I haven't received any updates."
        },
        {
            "start_offset_ms": 2000,
            "end_offset_ms": 3000,
            "speaker": "AGENT",
            "utterance": "I'd be happy to help you with that. Can you please provide your order number?"
        },
        {
            "start_offset_ms": 3000,
            "end_offset_ms": 4000,
            "speaker": "USER",
            "utterance": "Sure, it's BJ-2024-001-5678."
        },
        {
            "start_offset_ms": 4000,
            "end_offset_ms": 5000,
            "speaker": "AGENT",
            "utterance": "Thank you. Let me look that up for you. I can see your order is currently in transit and should arrive by tomorrow. The tracking information shows it's being processed through our fulfillment center."
        }
    ]

    call_data = {
        "agent_id": "123",
        "transcript": transcript,
        "start_time_utc": "2024-01-15T10:00:00Z",
        "participants": [
            {
                "role": "USER",
                "spoke_first": False
            },
            {
                "role": "AGENT",
                "spoke_first": True
            }
        ]
    }

    # Submit for evaluation
    headers = {
        "X-API-Key": api_key,
        "Content-Type": "application/json"
    }

    response = requests.post(
        f"{base_url}/evaluate",
        headers=headers,
        json=call_data
    )

    if response.status_code == 200:
        result = response.json()
        print("Evaluation submitted successfully!")
        print(f"Call ID: {result['call_id']}")
        print(f"Status: {result['status']}")
        return result
    else:
        print(f"Error submitting evaluation: {response.text}")
        return None

def evaluate_audio_with_transcript_file():
    """Example of evaluating a call with a transcript file URL"""

    api_key = "your-api-key-here"
    base_url = "https://api.getbluejay.ai/v1"

    call_data = {
        "agent_id": "123",
        "recording_url": "https://storage.example.com/audio.mp3",
        "transcript_url": "https://storage.example.com/transcript.json",
        "start_time_utc": datetime.now(timezone.utc).isoformat(),
        "participants": [
            {
                "role": "USER",
                "spoke_first": False
            },
            {
                "role": "AGENT",
                "spoke_first": True
            }
        ],
    }

    headers = {
        "X-API-Key": api_key,
        "Content-Type": "application/json"
    }

    response = requests.post(
        f"{base_url}/evaluate",
        headers=headers,
        json=call_data
    )

    return response.json()
The most efficient way to receive evaluation results is through the events webhook. When you submit a call for evaluation, Bluejay will automatically send the complete results to your registered webhook URL once processing is complete.

Configuring Your Webhook URL

Before implementing your webhook endpoint, you’ll need to register your webhook URL in the Bluejay dashboard:
  1. Log into your Bluejay dashboard
  2. Navigate to Settings → Developers → Webhooks
  3. Add your webhook URL (e.g., https://yourapp.com/webhook/events)
  4. Copy your webhook secret key for signature verification
  5. Save the configuration
Important:
  • Your webhook URL must be publicly accessible and use HTTPS for security
  • Store your webhook secret securely - you’ll need it to verify incoming webhook signatures
  • Bluejay sends the X-Bluejay-Signature header with each webhook for authentication

Setting Up Your Webhook Endpoint

First, create an endpoint to receive webhook notifications:
from fastapi import FastAPI, Request, HTTPException
import json
import hmac
import hashlib

app = FastAPI()

# Your webhook secret from the Bluejay dashboard
WEBHOOK_SECRET = "your-webhook-secret-here"

def verify_webhook_signature(payload: bytes, signature: str, secret: str) -> bool:
    """Verify the webhook signature from Bluejay"""
    if not signature:
        return False

    # Remove 'sha256=' prefix if present
    if signature.startswith('sha256='):
        signature = signature[7:]

    # Calculate expected signature
    expected_signature = hmac.new(
        secret.encode('utf-8'),
        payload,
        hashlib.sha256
    ).hexdigest()

    # Compare signatures securely
    return hmac.compare_digest(expected_signature, signature)

@app.post("/webhook/evaluate")
async def handle_evaluation_webhook(request: Request):
    """Handle incoming evaluation webhook from Bluejay with signature verification"""

    try:
        # Get raw payload for signature verification
        payload = await request.body()
        signature = request.headers.get('X-Bluejay-Signature')

        # Verify webhook signature
        if not verify_webhook_signature(payload, signature, WEBHOOK_SECRET):
            print("❌ Invalid webhook signature")
            raise HTTPException(status_code=401, detail="Invalid signature")

        # Parse the verified payload
        webhook_data = json.loads(payload)

        # The webhook sends the complete CallLog object with evaluations
        call_id = webhook_data.get('id')
        evaluations = webhook_data.get('evaluations', [])

        if evaluations:
            evaluation = evaluations[0]

            # Process evaluation results
            process_evaluation_results(call_id, evaluation)

        return {"status": "success"}

    except json.JSONDecodeError:
        print("❌ Invalid JSON payload")
        raise HTTPException(status_code=400, detail="Invalid JSON")
    except Exception as e:
        print(f"❌ Error processing webhook: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")

def process_evaluation_results(call_id, evaluation):
    """Async processing of evaluation results"""

    print(f"=== EVALUATION RESULTS FOR CALL {call_id} ===")
    print(f"Hallucination Detected: {evaluation.get('hallucination', 'N/A')}")
    print(f"Redundancy Score: {evaluation.get('redundancy', 'N/A')}")

    # Your processing logic here
    pass

Alternative: Polling for Results

If you prefer to poll for results instead of using webhooks, you can retrieve the results using the call ID from the retrieve-call-log endpoint:
import requests
import time

def get_evaluation_results_polling(call_id):
    """Alternative method: Poll for evaluation results"""

    api_key = "your-api-key-here"
    base_url = "https://api.getbluejay.ai/v1"

    headers = {
        "X-API-Key": api_key
    }

    # Poll for results (evaluation may take some time)
    max_attempts = 30
    attempt = 0

    while attempt < max_attempts:
        response = requests.get(
            f"{base_url}/retrieve-call-log/{call_id}",
            headers=headers
        )

        if response.status_code == 200:
            data = response.json()

            # Check if evaluation is complete
            if 'evaluations' in data and data['evaluations']:
                evaluation = data['evaluations'][0]
                print("=== EVALUATION RESULTS ===")
                print(f"Hallucination Detected: {evaluation.get('hallucination', 'N/A')}")
                if evaluation.get('hallucination_reasoning'):
                    print(f"Hallucination Reasoning: {evaluation['hallucination_reasoning']}")

                print(f"Redundancy Score: {evaluation.get('redundancy', 'N/A')}")
                if evaluation.get('redundancy_reasoning'):
                    print(f"Redundancy Reasoning: {evaluation['redundancy_reasoning']}")

                # Display custom metrics if available
                if evaluation.get('custom_metrics'):
                    print("\n=== CUSTOM METRICS ===")
                    for metric in evaluation['custom_metrics']:
                        print(f"{metric['name']}: {metric['value']}")

                return data
            else:
                print("Evaluation still processing...")
        else:
            print(f"Error retrieving results: {response.status_code}")
            return None

        # Wait before next attempt
        time.sleep(10)
        attempt += 1

    print("Evaluation timed out")
    return None

# Usage with polling method
call_id = evaluate_with_transcript_data()
if call_id:
    results = get_evaluation_results_polling(call_id['call_id'])

Advanced: Custom Metrics and Tool Calls

Track domain-specific performance indicators and include tool call data using the /v1/evaluate endpoint:
import requests
import json
from datetime import datetime, timezone

def evaluate_with_custom_metrics():
    """Example showing how to include custom metrics and tool calls"""

    api_key = "your-api-key-here"
    base_url = "https://api.getbluejay.ai/v1"

    # Example tool calls made during the conversation
    tool_calls = [
        {
            "name": "check_order_status",
            "parameters": {"order_id": "BJ-2024-001-5678"},
            "output": "Order is in transit and should arrive by tomorrow.",
            "start_offset_ms": 2000
        },
        {
            "name": "send_update_email",
            "parameters": {"customer_email": "customer@example.com", "message": "Order update sent"},
            "output": "Email sent to customer@example.com",
            "start_offset_ms": 3000
        }
    ]

    call_data = {
        "agent_id": "123",
        "recording_url": "https://storage.example.com/complex-call.mp3",
        "start_time_utc": "2024-01-15T10:00:00Z",
        "participants": [
            {
                "role": "USER",
                "spoke_first": False
            },
            {
                "role": "AGENT",
                "spoke_first": True
            }
        ],
        "tool_calls": tool_calls,
        "transcript": [
            {
                "start_offset_ms": 100,
                "end_offset_ms": 1000,
                "speaker": "AGENT",
                "utterance": "Hello, thank you for calling. How may I assist you?"
            },
            {
                "start_offset_ms": 1000,
                "end_offset_ms": 2000,
                "speaker": "USER",
                "utterance": "I need to check the status of my order and possibly get a refund."
            },
            {
                "start_offset_ms": 2000,
                "end_offset_ms": 3000,
                "speaker": "AGENT",
                "utterance": "I understand you'd like to check your order status. Let me look that up for you."
            }
        ],
        "custom_metrics": [
            "3c90c3cc-0d44-4b50-8888-8dd25736052a"
        ],
    }

    headers = {
        "X-API-Key": api_key,
        "Content-Type": "application/json"
    }

    response = requests.post(
        f"{base_url}/evaluate",
        headers=headers,
        json=call_data
    )

    return response.json()

Best Practices

Webhook Implementation

  1. Reliable Endpoint: Ensure your webhook endpoint is highly available and can handle the expected volume
  2. Signature Verification: Always verify the X-Bluejay-Signature header using HMAC-SHA256 to ensure webhook authenticity
  3. Idempotency: Handle duplicate webhook deliveries gracefully by checking call IDs
  4. Response Time: Respond quickly (< 5 seconds) to webhook requests to avoid timeouts
  5. Error Handling: Return appropriate HTTP status codes (200 for success, 401 for invalid signature, 4xx/5xx for other errors)
  6. HTTPS Only: Use HTTPS endpoints to protect webhook data in transit
  7. Secret Management: Store webhook secrets securely using environment variables or secret management systems
# Example: Idempotent webhook handling with signature verification
import os
processed_calls = set()

# Get webhook secret from environment variable
WEBHOOK_SECRET = os.getenv('BLUEJAY_WEBHOOK_SECRET')

@app.route('/webhook/evaluate', methods=['POST'])
def handle_evaluation_webhook():
    # Verify signature first
    payload = request.get_data()
    signature = request.headers.get('X-Bluejay-Signature')

    if not verify_webhook_signature(payload, signature, WEBHOOK_SECRET):
        return jsonify({"error": "Invalid signature"}), 401

    # Parse verified payload
    webhook_data = json.loads(payload)
    call_id = webhook_data.get('id')

    # Check if already processed
    if call_id in processed_calls:
        return jsonify({"status": "already_processed"}), 200

    # Process the evaluation
    process_evaluation_results(call_id, webhook_data.get('evaluations', []))
    processed_calls.add(call_id)

    return jsonify({"status": "success"}), 200

# Production-ready version with persistent storage
import redis
redis_client = redis.Redis(host='localhost', port=6379, db=0)

@app.route('/webhook/evaluate', methods=['POST'])
def handle_evaluation_webhook_production():
    # Verify signature
    payload = request.get_data()
    signature = request.headers.get('X-Bluejay-Signature')

    if not verify_webhook_signature(payload, signature, WEBHOOK_SECRET):
        return jsonify({"error": "Invalid signature"}), 401

    # Parse verified payload
    webhook_data = json.loads(payload)
    call_id = webhook_data.get('id')

    # Check if already processed using Redis
    if redis_client.exists(f"processed_call:{call_id}"):
        return jsonify({"status": "already_processed"}), 200

    # Process the evaluation
    process_evaluation_results(call_id, webhook_data.get('evaluations', []))

    # Mark as processed with expiration (24 hours)
    redis_client.setex(f"processed_call:{call_id}", 86400, "1")

    return jsonify({"status": "success"}), 200

Data Quality

  1. Audio Quality: Use high-quality recordings (16kHz+ sample rate, minimal background noise)
  2. Transcript Accuracy: Ensure transcripts are accurate and include timestamps
  3. Complete Metadata: Provide all participant information and call details

Error Handling

def evaluate_with_error_handling():
    """Example with comprehensive error handling"""

    import requests
    from requests.exceptions import RequestException, Timeout

    api_key = "your-api-key-here"
    base_url = "https://api.getbluejay.ai/v1"

    call_data = {
        "agent_id": "123",
        "recording_url": "https://storage.example.com/audio.mp3",
        "start_time_utc": datetime.now(timezone.utc).isoformat(),
        "participants": [
            {
                "role": "USER",
                "spoke_first": False
            },
            {
                "role": "AGENT",
                "spoke_first": True
            }
        ]
    }

    headers = {
        "X-API-Key": api_key,
        "Content-Type": "application/json"
    }

    try:
        response = requests.post(
            f"{base_url}/evaluate",
            headers=headers,
            json=call_data,
            timeout=30
        )

        response.raise_for_status()  # Raise exception for bad status codes

        result = response.json()
        print(f"✅ Evaluation submitted successfully: {result['call_id']}")
        return result

    except Timeout:
        print("❌ Request timed out")
        return None
    except RequestException as e:
        print(f"❌ Request failed: {e}")
        return None
    except json.JSONDecodeError:
        print("❌ Invalid JSON response")
        return None

Batch Processing

For processing multiple calls, implement batch processing with rate limiting:
import time

def process_batch_evaluations(call_files):
    """Process multiple call evaluations with rate limiting"""

    api_key = "your-api-key-here"
    base_url = "https://api.getbluejay.ai/v1"
    results = []

    for i, call_file in enumerate(call_files):
        print(f"Processing call {i+1}/{len(call_files)}: {call_file}")

        # Submit evaluation
        result = evaluate_call_from_file(call_file, api_key, base_url)
        results.append(result)

        # Rate limiting - avoid overwhelming the API
        if i < len(call_files) - 1:  # Don't sleep after last call
            time.sleep(1)

    return results

def evaluate_call_from_file(file_path, api_key, base_url):
    """Helper function to evaluate a single call file"""
    # Implementation depends on your file format
    pass

Next Steps

  • Webhook Setup: Register your webhook URL in the Bluejay dashboard to start receiving automatic evaluation results
  • Custom Metrics: Define domain-specific evaluation criteria using custom metrics via API or the Bluejay dashboard
  • Real-time Processing: Build real-time evaluation processing pipelines using webhook notifications
  • Alerting: Implement automated alerts in your webhook handler for immediate notification of performance issues
  • Visualization: Access historical evaluation results using the Bluejay dashboard or build custom dashboards using webhook data
For webhook registration and more advanced use cases, explore the API Reference or contact the Bluejay team for custom evaluation configurations.