Proto file

The api.proto file that defines the gRPC service for streaming (behavioral/deepfakes)

The Streaming API is located at streaming.behavioralsignals.com:443

Below is the api.proto file that can be used to generate gRPC clients from scratch:

syntax = "proto3";
package behavioral_api.grpc.v1;

service BehavioralStreamingApi {
  /*
  Bi-directional streaming method for sending chunks of audio and retrieving real-time
  behavioral and emotion results.
   */
  rpc StreamAudio(stream AudioStream) returns (stream StreamResult) {}

  /*
  Bi-directional streaming method for sending chunks of audio and retrieving real-time
  deepfake detection results.
   */
  rpc DeepfakeDetection(stream AudioStream) returns (stream StreamResult) {}
}

enum AudioEncoding {
  // Uncompressed 16-bit signed little-endian samples (Linear PCM - pcm_s16le).
  LINEAR_PCM = 0;
}

// Provides information to the recognizer that specifies how to process the
// request
message AudioConfig {
    // The encoding of the audio data sent in the request
    AudioEncoding encoding = 1;

    // Sample rate in Hertz of the audio data sent
    int32 sample_rate_hertz = 2;

    // If specified, filter the level of the results to return:
    // * segment: Return only results at segment level
    // * utterance: Return only results per utterance
    // By default the endpoint returns both segment/utterance level results.
    optional Level level = 3;

    // Whether to return also logits in addition to posteriors
    optional bool logits = 4;

    // Whether to return feature embeddings
    optional bool feature_embedding = 5;
}

/*
 * AUDIO STREAM MESSAGE DEFINITION
 */

// The streaming request, which is either a streaming config or audio content.
message AudioStream {
  int64 cid = 1;
  string x_auth_token = 2;

  // Provides information to the recognizer that specifies how to process the
  // request. The first `AudioStream` message must contain a `streaming_config`  message.
  AudioConfig config = 3;
  // The audio data to be recognized. Sequential chunks of audio data are sent in sequential
  // `AudioStream` messages. The first `AudioStream` message must not contain `audio` data
  // and all subsequent `AudioStream` messages must contain `audio` data. The audio bytes must
  // be encoded as specified in `StreamConfig`.
  bytes audio_content = 4;
}

message Prediction {
  // label predicted by the model
  string label = 1;
  // confidence of the prediction
  optional string posterior = 2;
  // logit
  optional string logit = 3;
}

message InferenceResult {
  //message identifier
  string id = 1;
  string start_time = 2;
  string end_time = 3;
  //task name
  string task = 4;
  // pair of label and posteriors
  repeated Prediction prediction = 5;
  // label after thresholding
  string final_label = 6;
  // embedding of the segment
  optional string embedding = 7;
  // Level of result
  optional Level level = 8;
}

// The response message of the server that contains the inference results.
message StreamResult {
  int64 cid = 1;
  int64 pid = 2;
  int32 message_id = 3;

  repeated InferenceResult result = 4;
}

// Used to indicate the level (utterance/segment) of the prediction
enum Level {
  segment = 0;
  utterance = 1;
}

Messages

  • AudioStream - The streaming request message
  • AudioConfig - Audio configuration message
  • StreamResult - Response message containing results
  • InferenceResult - Inference analysis result
  • Prediction - Individual prediction with confidence

Enums

  • AudioEncoding.LINEAR_PCM - Audio encoding type
  • Level.segment - Segment-level results
  • Level.utterance - Utterance-level results

Fields

  • audio_content - Raw audio bytes field
  • sample_rate_hertz - Sample rate configuration
  • final_label - Primary prediction result
  • start_time / end_time - Timing fields

Services & Methods

  • BehavioralStreamingApi service
  • StreamAudio() method for behavioral analysis
  • DeepfakeDetection() method for deepfake detection