GStreamer appsrc: Playback Speed Affected by Buffer Size

In the following code, I use appsrc to push waveform audio data generated by code and play it locally using audioconvert, audioresample, and autoaudiosink. I have set the “format” of appsrc to GST_FORMAT_TIME, which should ensure that the data is played according to timestamps.

However, when I modify the SAMPLE_NUM macro to control the amount of data pushed in each call to push_data(), the playback speed changes unexpectedly. For example, when I set SAMPLE_NUM to 512, the sound plays slower than normal. When I set it to 48 (1ms of data), the sound becomes very fast, and even overlaps, causing a sharp noise.

Why is this happening? How can I fix this issue to ensure that the playback speed is correct?

BTW, the code is running on windows11, vs2022

#include <gst/gst.h>
#include <gst/audio/audio.h>
#include <string.h>
#include <stdio.h>

#define SAMPLE_RATE 48000 /* Samples per second we are sending */
#define CHANNELS 1
#define BIT_PER_SAMPLE 16
#define SAMPLE_NUM  48  /* Amount of bytes we are sending in each buffer */

#define BTYE_PER_SAMPLE (BIT_PER_SAMPLE / 8)
#define CHUNK_SIZE (SAMPLE_NUM * CHANNELS * BTYE_PER_SAMPLE)   /* Amount of bytes we are sending in each buffer */

/* Structure to contain all our information, so we can pass it to callbacks */
typedef struct _CustomData {

    GstElement* pipeline, * app_source, * audio_convert1, * audio_resample, * audio_sink;
    GstElement* rtp_pay, *udp_sink;
    GstElement* app_queue, * app_sink;
    guint64 num_samples;   /* Number of samples generated so far (for timestamp generation) */
    gfloat a, b, c, d;     /* For waveform generation */

    guint sourceid;        /* To control the GSource */

    GMainLoop* main_loop;  /* GLib's Main Loop */
} CustomData;

/* This method is called by the idle GSource in the mainloop, to feed CHUNK_SIZE bytes into appsrc.
 * The idle handler is added to the mainloop when appsrc requests us to start sending data (need-data signal)
 * and is removed when appsrc has enough data (enough-data signal).
 */
static gboolean push_data(CustomData* data) {
    GstBuffer* buffer;
    GstFlowReturn ret;
    int i;
    GstMapInfo map;
    gint16* raw;
    gint num_samples = SAMPLE_NUM; /* Because each sample is 16 bits */
    gfloat freq;

    /* Create a new empty buffer */
    buffer = gst_buffer_new_and_alloc(CHUNK_SIZE);

    /* Set its timestamp and duration */
    guint64 timestamp = gst_util_uint64_scale(data->num_samples, GST_SECOND, SAMPLE_RATE);
    guint64 duration = gst_util_uint64_scale(num_samples, GST_SECOND, SAMPLE_RATE);
    // printf("timestamp: %lu, duration: %lu\n", timestamp, duration);
    GST_BUFFER_TIMESTAMP(buffer) = timestamp;
    GST_BUFFER_DURATION(buffer) = duration;

    /* Generate some psychodelic waveforms */
    gst_buffer_map(buffer, &map, GST_MAP_WRITE);
    raw = (gint16*)map.data;
    data->c += data->d;
    data->d -= data->c / 1000;
    freq = 1100 + 1000 * data->d;
    for (i = 0; i < num_samples; i++) {
        data->a += data->b;
        data->b -= data->a / freq;
        for (int ch = 0; ch < CHANNELS; ch++)
        {
            raw[i * CHANNELS + ch] = (gint16)(500 * data->a);
        }
    }

    gst_buffer_unmap(buffer, &map);
    data->num_samples += num_samples;

    /* Push the buffer into the appsrc */
    g_signal_emit_by_name(data->app_source, "push-buffer", buffer, &ret);

    /* Free the buffer now that we are done with it */
    gst_buffer_unref(buffer);

    if (ret != GST_FLOW_OK) {
        /* We got some error, stop sending data */
        return FALSE;
    }

    return TRUE;
}

/* This signal callback triggers when appsrc needs data. Here, we add an idle handler
 * to the mainloop to start pushing data into the appsrc */
static void start_feed(GstElement* source, guint size, CustomData* data) {
    if (data->sourceid == 0) {
        g_print("Start feeding\n");
        data->sourceid = g_idle_add((GSourceFunc)push_data, data);
    }
}

/* This callback triggers when appsrc has enough data and we can stop sending.
 * We remove the idle handler from the mainloop */
static void stop_feed(GstElement* source, CustomData* data) {
    if (data->sourceid != 0) {
        g_print("Stop feeding\n");
        g_source_remove(data->sourceid);
        data->sourceid = 0;
    }
}

/* The appsink has received a buffer */
static GstFlowReturn new_sample(GstElement* sink, CustomData* data) {
    GstSample* sample;

    /* Retrieve the buffer */
    g_signal_emit_by_name(sink, "pull-sample", &sample);
    if (sample) {
        /* The only thing we do in this example is print a * to indicate a received buffer */
        g_print("*");
        gst_sample_unref(sample);
        return GST_FLOW_OK;
    }

    return GST_FLOW_ERROR;
}

/* This function is called when an error message is posted on the bus */
static void error_cb(GstBus* bus, GstMessage* msg, CustomData* data) {
    GError* err;
    gchar* debug_info;

    /* Print error details on the screen */
    gst_message_parse_error(msg, &err, &debug_info);
    g_printerr("Error received from element %s: %s\n", GST_OBJECT_NAME(msg->src), err->message);
    g_printerr("Debugging information: %s\n", debug_info ? debug_info : "none");
    g_clear_error(&err);
    g_free(debug_info);

    g_main_loop_quit(data->main_loop);
}

int main(int argc, char* argv[]) {
    CustomData data;
    GstPad* tee_audio_pad, * tee_video_pad, * tee_app_pad;
    GstPad* queue_audio_pad, * queue_video_pad, * queue_app_pad;
    GstAudioInfo info;
    GstCaps* audio_caps, * rtp_caps;
    GstBus* bus;

    /* Initialize custom data structure */
    memset(&data, 0, sizeof(data));
    data.b = 1; /* For waveform generation */
    data.d = 1;

    /* Initialize GStreamer */
    gst_init(&argc, &argv);

    /* Create the elements */
    data.app_source = gst_element_factory_make("appsrc", "audio_source");
    data.audio_convert1 = gst_element_factory_make("audioconvert", "audio_convert1"); 
    data.audio_resample = gst_element_factory_make("audioresample", "audio_resample");
    data.audio_sink = gst_element_factory_make("autoaudiosink", "audio_sink");
    /* Create the empty pipeline */
    data.pipeline = gst_pipeline_new("test-pipeline");

    if (!data.pipeline || !data.app_source ||
        !data.audio_resample || !data.audio_sink) {
        g_printerr("Not all elements could be created.\n");
        return -1;
    }

    /* Configure appsrc */
    gst_audio_info_set_format(&info, GST_AUDIO_FORMAT_S16, SAMPLE_RATE, CHANNELS, NULL);
    audio_caps = gst_audio_info_to_caps(&info);
    g_object_set(data.app_source, "caps", audio_caps, "format", GST_FORMAT_TIME, NULL);
    g_signal_connect(data.app_source, "need-data", G_CALLBACK(start_feed), &data);
    g_signal_connect(data.app_source, "enough-data", G_CALLBACK(stop_feed), &data);
    gst_caps_unref(audio_caps);

    /* Link all elements that can be automatically linked because they have "Always" pads */
    gst_bin_add_many(GST_BIN(data.pipeline), data.app_source, data.audio_convert1, data.audio_resample, data.audio_sink, NULL);
    if (gst_element_link_many(data.app_source, data.audio_convert1, data.audio_resample, data.audio_sink, NULL) != TRUE) {
        g_printerr("Elements could not be linked.\n");
        gst_object_unref(data.pipeline);
        return -1;
    }

    /* Instruct the bus to emit signals for each received message, and connect to the interesting signals */
    bus = gst_element_get_bus(data.pipeline);
    gst_bus_add_signal_watch(bus);
    g_signal_connect(G_OBJECT(bus), "message::error", (GCallback)error_cb, &data);
    gst_object_unref(bus);

    /* Start playing the pipeline */
    gst_element_set_state(data.pipeline, GST_STATE_PLAYING);

    /* Create a GLib Main Loop and set it to run */
    data.main_loop = g_main_loop_new(NULL, FALSE);
    g_main_loop_run(data.main_loop);

    /* Free resources */
    gst_element_set_state(data.pipeline, GST_STATE_NULL);
    gst_object_unref(data.pipeline);
    return 0;
}

Actually, I find that you can reproduce this issue directly in basic-tutorial-8 by changing CHUNK_SIZE to 96.

How to set the parameters to ensure that the playback speed is correct?

We use appsrc in Transitive (a robotics framework) in a live-streaming context where we have similar requirements, I think. For us it was necessary to set is-live=true do-timestamp=true format=GST_FORMAT_TIME block=true on the appsrc. I suspect the main thing here that you are missing is the is-live=true.