Add capability for memory and persistent queue to block when add items

bogdandrutu · bogdandrutu · commit 07509db971b3 · 2025-01-10T16:33:14.000-08:00
Signed-off-by: Bogdan Drutu &lt;bogdandrutu@gmail.com&gt;
diff --git a/exporter/exporterqueue/bounded_memory_queue.go b/exporter/exporterqueue/bounded_memory_queue.go
@@ -23,17 +23,18 @@ type boundedMemoryQueue[T any] struct {
 type memoryQueueSettings[T any] struct {
 	sizer    sizer[T]
 	capacity int64
+	blocking bool
 }
 
 // newBoundedMemoryQueue constructs the new queue of specified capacity, and with an optional
 // callback for dropped items (e.g. useful to emit metrics).
 func newBoundedMemoryQueue[T any](set memoryQueueSettings[T]) Queue[T] {
 	return &boundedMemoryQueue[T]{
-		sizedQueue: newSizedQueue[T](set.capacity, set.sizer),
+		sizedQueue: newSizedQueue[T](set.capacity, set.sizer, set.blocking),
 	}
 }
 
-func (q *boundedMemoryQueue[T]) Read(_ context.Context) (uint64, context.Context, T, bool) {
+func (q *boundedMemoryQueue[T]) Read(context.Context) (uint64, context.Context, T, bool) {
 	ctx, req, ok := q.sizedQueue.pop()
 	return 0, ctx, req, ok
 }
diff --git a/exporter/exporterqueue/bounded_memory_queue_test.go b/exporter/exporterqueue/bounded_memory_queue_test.go
@@ -133,6 +133,40 @@ func TestQueueUsage(t *testing.T) {
 	}
 }
 
+func TestBlockingQueueUsage(t *testing.T) {
+	tests := []struct {
+		name  string
+		sizer sizer[ptrace.Traces]
+	}{
+		{
+			name:  "requests_based",
+			sizer: &requestSizer[ptrace.Traces]{},
+		},
+		{
+			name:  "items_based",
+			sizer: &itemsSizer{},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			q := newBoundedMemoryQueue[ptrace.Traces](memoryQueueSettings[ptrace.Traces]{sizer: tt.sizer, capacity: int64(100)})
+			consumed := &atomic.Int64{}
+			require.NoError(t, q.Start(context.Background(), componenttest.NewNopHost()))
+			ac := newAsyncConsumer(q, 10, func(context.Context, ptrace.Traces) error {
+				consumed.Add(1)
+				return nil
+			})
+			td := testdata.GenerateTraces(10)
+			for j := 0; j < 1_000_000; j++ {
+				require.NoError(t, q.Offer(context.Background(), td))
+			}
+			assert.NoError(t, q.Shutdown(context.Background()))
+			assert.NoError(t, ac.Shutdown(context.Background()))
+			assert.Equal(t, int64(10), consumed.Load())
+		})
+	}
+}
+
 func TestZeroSizeNoConsumers(t *testing.T) {
 	q := newBoundedMemoryQueue[string](memoryQueueSettings[string]{sizer: &requestSizer[string]{}, capacity: 0})
 
diff --git a/exporter/exporterqueue/cond.go b/exporter/exporterqueue/cond.go
@@ -0,0 +1,59 @@
+package exporterqueue
+
+import (
+	"context"
+	"sync"
+)
+
+// cond is equivalent with sync.Cond, but context.Context aware. Which means Wait() will return if context is done.
+// Also, it requires the caller to hold the c.L during all calls.
+type cond struct {
+	L       sync.Locker
+	ch      chan struct{}
+	waiting int64
+}
+
+func newCond(l sync.Locker) *cond {
+	return &cond{L: l, ch: make(chan struct{}, 1)}
+}
+
+// Signal wakes one goroutine waiting on c, if there is any.
+// It requires for the caller to hold c.L during the call.
+func (c *cond) Signal() {
+	if c.waiting == 0 {
+		return
+	}
+	c.waiting--
+	c.ch <- struct{}{}
+}
+
+// Broadcast wakes all goroutines waiting on c.
+// It requires for the caller to hold c.L during the call.
+func (c *cond) Broadcast() {
+	for ; c.waiting > 0; c.waiting-- {
+		c.ch <- struct{}{}
+	}
+}
+
+// Wait atomically unlocks c.L and suspends execution of the calling goroutine. After later resuming execution, Wait locks c.L before returning.
+func (c *cond) Wait(ctx context.Context) error {
+	c.waiting++
+	c.L.Unlock()
+	select {
+	case <-ctx.Done():
+		c.L.Lock()
+		if c.waiting == 0 {
+			// If waiting is 0, it means that there was a signal sent and nobody else waits for it.
+			// Consume it, so that we don't unblock other consumer unnecessary,
+			// or we don't block the producer because the channel buffer is full.
+			<-c.ch
+		} else {
+			// Decrease the number of waiting routines.
+			c.waiting--
+		}
+		return ctx.Err()
+	case <-c.ch:
+		c.L.Lock()
+		return nil
+	}
+}
diff --git a/exporter/exporterqueue/persistent_queue.go b/exporter/exporterqueue/persistent_queue.go
@@ -42,6 +42,7 @@ var (
 type persistentQueueSettings[T any] struct {
 	sizer       sizer[T]
 	capacity    int64
+	blocking    bool
 	signal      pipeline.Signal
 	storageID   component.ID
 	marshaler   Marshaler[T]
@@ -78,10 +79,12 @@ type persistentQueue[T any] struct {
 
 	// isRequestSized indicates whether the queue is sized by the number of requests.
 	isRequestSized bool
+	blocking       bool
 
 	// mu guards everything declared below.
 	mu                       sync.Mutex
-	hasElements              *sync.Cond
+	hasMoreElements          *cond
+	hasMoreSpace             *cond
 	readIndex                uint64
 	writeIndex               uint64
 	currentlyDispatchedItems []uint64
@@ -95,10 +98,11 @@ func newPersistentQueue[T any](set persistentQueueSettings[T]) Queue[T] {
 	_, isRequestSized := set.sizer.(*requestSizer[T])
 	pq := &persistentQueue[T]{
 		set:            set,
-		logger:         set.set.Logger,
 		isRequestSized: isRequestSized,
+		blocking:       set.blocking,
 	}
-	pq.hasElements = sync.NewCond(&pq.mu)
+	pq.hasMoreElements = newCond(&pq.mu)
+	pq.hasMoreSpace = newCond(&pq.mu)
 	return pq
 }
 
@@ -194,7 +198,7 @@ func (pq *persistentQueue[T]) Shutdown(ctx context.Context) error {
 	backupErr := pq.backupQueueSize(ctx)
 	// Mark this queue as stopped, so consumer don't start any more work.
 	pq.stopped = true
-	pq.hasElements.Broadcast()
+	pq.hasMoreElements.Broadcast()
 	return multierr.Combine(backupErr, pq.unrefClient(ctx))
 }
 
@@ -233,8 +237,13 @@ func (pq *persistentQueue[T]) Offer(ctx context.Context, req T) error {
 // putInternal is the internal version that requires caller to hold the mutex lock.
 func (pq *persistentQueue[T]) putInternal(ctx context.Context, req T) error {
 	reqSize := pq.set.sizer.Sizeof(req)
-	if pq.queueSize+reqSize > pq.set.capacity {
-		return ErrQueueIsFull
+	for pq.queueSize+reqSize > pq.set.capacity {
+		if !pq.blocking {
+			return ErrQueueIsFull
+		}
+		if err := pq.hasMoreSpace.Wait(ctx); err != nil {
+			return err
+		}
 	}
 
 	reqBuf, err := pq.set.marshaler(req)
@@ -253,7 +262,7 @@ func (pq *persistentQueue[T]) putInternal(ctx context.Context, req T) error {
 
 	pq.writeIndex++
 	pq.queueSize += reqSize
-	pq.hasElements.Signal()
+	pq.hasMoreElements.Signal()
 
 	// Back up the queue size to storage every 10 writes. The stored value is used to recover the queue size
 	// in case if the collector is killed. The recovered queue size is allowed to be inaccurate.
@@ -270,31 +279,31 @@ func (pq *persistentQueue[T]) Read(ctx context.Context) (uint64, context.Context
 	pq.mu.Lock()
 	defer pq.mu.Unlock()
 	for {
-		if pq.stopped {
-			var req T
-			return 0, context.Background(), req, false
-		}
 
 		// If queue is empty, wait until more elements and restart.
-		if pq.readIndex == pq.writeIndex {
-			pq.hasElements.Wait()
-			continue
-		}
-
-		index, req, consumed := pq.getNextItem(ctx)
-		if consumed {
-			pq.queueSize -= pq.set.sizer.Sizeof(req)
-			// The size might be not in sync with the queue in case it's restored from the disk
-			// because we don't flush the current queue size on the disk on every read/write.
-			// In that case we need to make sure it doesn't go below 0.
-			if pq.queueSize < 0 {
-				pq.queueSize = 0
+		for pq.readIndex != pq.writeIndex {
+			index, req, consumed := pq.getNextItem(ctx)
+			if consumed {
+				pq.queueSize -= pq.set.sizer.Sizeof(req)
+				// The size might be not in sync with the queue in case it's restored from the disk
+				// because we don't flush the current queue size on the disk on every read/write.
+				// In that case we need to make sure it doesn't go below 0.
+				if pq.queueSize < 0 {
+					pq.queueSize = 0
+				}
+
+				return index, context.Background(), req, true
 			}
+		}
 
-			return index, context.Background(), req, true
+		if pq.stopped {
+			var req T
+			return 0, context.Background(), req, false
 		}
 
-		// If we did not consume any element retry from the beginning.
+		// TODO: Change the Queue interface to return an error to allow distinguish between shutdown and context canceled.
+		// Ok to ignore the error, since the context.Background() will never be done.
+		_ = pq.hasMoreElements.Wait(context.Background())
 	}
 }
 
diff --git a/exporter/exporterqueue/sized_queue.go b/exporter/exporterqueue/sized_queue.go
@@ -50,22 +50,26 @@ type sizedQueue[T any] struct {
 	sizer sizer[T]
 	cap   int64
 
-	mu          sync.Mutex
-	hasElements *sync.Cond
-	items       *linkedQueue[T]
-	size        int64
-	stopped     bool
+	mu              sync.Mutex
+	hasMoreElements *cond
+	hasMoreSpace    *cond
+	items           *linkedQueue[T]
+	size            int64
+	stopped         bool
+	blocking        bool
 }
 
 // newSizedQueue creates a sized elements channel. Each element is assigned a size by the provided sizer.
 // capacity is the capacity of the queue.
-func newSizedQueue[T any](capacity int64, sizer sizer[T]) *sizedQueue[T] {
+func newSizedQueue[T any](capacity int64, sizer sizer[T], blocking bool) *sizedQueue[T] {
 	sq := &sizedQueue[T]{
-		sizer: sizer,
-		cap:   capacity,
-		items: &linkedQueue[T]{},
+		sizer:    sizer,
+		cap:      capacity,
+		items:    &linkedQueue[T]{},
+		blocking: blocking,
 	}
-	sq.hasElements = sync.NewCond(&sq.mu)
+	sq.hasMoreElements = newCond(&sq.mu)
+	sq.hasMoreSpace = newCond(&sq.mu)
 	return sq
 }
 
@@ -84,14 +88,20 @@ func (sq *sizedQueue[T]) Offer(ctx context.Context, el T) error {
 	sq.mu.Lock()
 	defer sq.mu.Unlock()
 
-	if sq.size+elSize > sq.cap {
-		return ErrQueueIsFull
+	for sq.size+elSize > sq.cap {
+		if !sq.blocking {
+			return ErrQueueIsFull
+		}
+		// Wait for more space or before the ctx is Done.
+		if err := sq.hasMoreSpace.Wait(ctx); err != nil {
+			return err
+		}
 	}
 
 	sq.size += elSize
 	sq.items.push(ctx, el, elSize)
 	// Signal one consumer if any.
-	sq.hasElements.Signal()
+	sq.hasMoreElements.Signal()
 	return nil
 }
 
@@ -104,17 +114,20 @@ func (sq *sizedQueue[T]) pop() (context.Context, T, bool) {
 
 	for {
 		if sq.size > 0 {
-			ctx, el, elSize := sq.items.pop()
+			elCtx, el, elSize := sq.items.pop()
 			sq.size -= elSize
-			return ctx, el, true
+			sq.hasMoreSpace.Signal()
+			return elCtx, el, true
 		}
 
 		if sq.stopped {
 			var el T
 			return context.Background(), el, false
 		}
 
-		sq.hasElements.Wait()
+		// TODO: Change the Queue interface to return an error to allow distinguish between shutdown and context canceled.
+		// Ok to ignore the error, since the context.Background() will never be done.
+		_ = sq.hasMoreElements.Wait(context.Background())
 	}
 }
 
@@ -123,7 +136,7 @@ func (sq *sizedQueue[T]) Shutdown(context.Context) error {
 	sq.mu.Lock()
 	defer sq.mu.Unlock()
 	sq.stopped = true
-	sq.hasElements.Broadcast()
+	sq.hasMoreElements.Broadcast()
 	return nil
 }
 
diff --git a/exporter/exporterqueue/sized_queue_test.go b/exporter/exporterqueue/sized_queue_test.go
@@ -18,7 +18,7 @@ func (s sizerInt) Sizeof(el int) int64 {
 }
 
 func TestSizedQueue(t *testing.T) {
-	q := newSizedQueue[int](7, sizerInt{})
+	q := newSizedQueue[int](7, sizerInt{}, false)
 	require.NoError(t, q.Offer(context.Background(), 1))
 	assert.Equal(t, 1, q.Size())
 	assert.Equal(t, 7, q.Capacity())
@@ -47,7 +47,7 @@ func TestSizedQueue(t *testing.T) {
 }
 
 func TestSizedQueue_DrainAllElements(t *testing.T) {
-	q := newSizedQueue[int](7, sizerInt{})
+	q := newSizedQueue[int](7, sizerInt{}, false)
 	require.NoError(t, q.Offer(context.Background(), 1))
 	require.NoError(t, q.Offer(context.Background(), 3))
 
@@ -68,12 +68,12 @@ func TestSizedQueue_DrainAllElements(t *testing.T) {
 }
 
 func TestSizedChannel_OfferInvalidSize(t *testing.T) {
-	q := newSizedQueue[int](1, sizerInt{})
+	q := newSizedQueue[int](1, sizerInt{}, false)
 	require.ErrorIs(t, q.Offer(context.Background(), -1), errInvalidSize)
 }
 
 func TestSizedChannel_OfferZeroSize(t *testing.T) {
-	q := newSizedQueue[int](1, sizerInt{})
+	q := newSizedQueue[int](1, sizerInt{}, false)
 	require.NoError(t, q.Offer(context.Background(), 0))
 	require.NoError(t, q.Shutdown(context.Background()))
 	// Because the size 0 is ignored, nothing to drain.

Original file line number	Diff line number	Diff line change
`@@ -23,17 +23,18 @@ type boundedMemoryQueue[T any] struct {`
`23`	`23`	`type memoryQueueSettings[T any] struct {`
`24`	`24`	`sizer sizer[T]`
`25`	`25`	`capacity int64`
	`26`	`+ blocking bool`
`26`	`27`	`}`
`27`	`28`
`28`	`29`	`// newBoundedMemoryQueue constructs the new queue of specified capacity, and with an optional`
`29`	`30`	`// callback for dropped items (e.g. useful to emit metrics).`
`30`	`31`	`func newBoundedMemoryQueue[T any](set memoryQueueSettings[T]) Queue[T] {`
`31`	`32`	`return &boundedMemoryQueue[T]{`
`32`		`- sizedQueue: newSizedQueue[T](set.capacity, set.sizer),`
	`33`	`+ sizedQueue: newSizedQueue[T](set.capacity, set.sizer, set.blocking),`
`33`	`34`	`}`
`34`	`35`	`}`
`35`	`36`
`36`		`-func (q *boundedMemoryQueue[T]) Read(_ context.Context) (uint64, context.Context, T, bool) {`
	`37`	`+func (q *boundedMemoryQueue[T]) Read(context.Context) (uint64, context.Context, T, bool) {`
`37`	`38`	`ctx, req, ok := q.sizedQueue.pop()`
`38`	`39`	`return 0, ctx, req, ok`
`39`	`40`	`}`