batch_span_processor.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. // Copyright The OpenTelemetry Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package trace // import "go.opentelemetry.io/otel/sdk/trace"
  15. import (
  16. "context"
  17. "runtime"
  18. "sync"
  19. "sync/atomic"
  20. "time"
  21. "go.opentelemetry.io/otel"
  22. "go.opentelemetry.io/otel/internal/global"
  23. "go.opentelemetry.io/otel/sdk/internal/env"
  24. "go.opentelemetry.io/otel/trace"
  25. )
  26. // Defaults for BatchSpanProcessorOptions.
  27. const (
  28. DefaultMaxQueueSize = 2048
  29. DefaultScheduleDelay = 5000
  30. DefaultExportTimeout = 30000
  31. DefaultMaxExportBatchSize = 512
  32. )
  33. // BatchSpanProcessorOption configures a BatchSpanProcessor.
  34. type BatchSpanProcessorOption func(o *BatchSpanProcessorOptions)
  35. // BatchSpanProcessorOptions is configuration settings for a
  36. // BatchSpanProcessor.
  37. type BatchSpanProcessorOptions struct {
  38. // MaxQueueSize is the maximum queue size to buffer spans for delayed processing. If the
  39. // queue gets full it drops the spans. Use BlockOnQueueFull to change this behavior.
  40. // The default value of MaxQueueSize is 2048.
  41. MaxQueueSize int
  42. // BatchTimeout is the maximum duration for constructing a batch. Processor
  43. // forcefully sends available spans when timeout is reached.
  44. // The default value of BatchTimeout is 5000 msec.
  45. BatchTimeout time.Duration
  46. // ExportTimeout specifies the maximum duration for exporting spans. If the timeout
  47. // is reached, the export will be cancelled.
  48. // The default value of ExportTimeout is 30000 msec.
  49. ExportTimeout time.Duration
  50. // MaxExportBatchSize is the maximum number of spans to process in a single batch.
  51. // If there are more than one batch worth of spans then it processes multiple batches
  52. // of spans one batch after the other without any delay.
  53. // The default value of MaxExportBatchSize is 512.
  54. MaxExportBatchSize int
  55. // BlockOnQueueFull blocks onEnd() and onStart() method if the queue is full
  56. // AND if BlockOnQueueFull is set to true.
  57. // Blocking option should be used carefully as it can severely affect the performance of an
  58. // application.
  59. BlockOnQueueFull bool
  60. }
  61. // batchSpanProcessor is a SpanProcessor that batches asynchronously-received
  62. // spans and sends them to a trace.Exporter when complete.
  63. type batchSpanProcessor struct {
  64. e SpanExporter
  65. o BatchSpanProcessorOptions
  66. queue chan ReadOnlySpan
  67. dropped uint32
  68. batch []ReadOnlySpan
  69. batchMutex sync.Mutex
  70. timer *time.Timer
  71. stopWait sync.WaitGroup
  72. stopOnce sync.Once
  73. stopCh chan struct{}
  74. }
  75. var _ SpanProcessor = (*batchSpanProcessor)(nil)
  76. // NewBatchSpanProcessor creates a new SpanProcessor that will send completed
  77. // span batches to the exporter with the supplied options.
  78. //
  79. // If the exporter is nil, the span processor will preform no action.
  80. func NewBatchSpanProcessor(exporter SpanExporter, options ...BatchSpanProcessorOption) SpanProcessor {
  81. maxQueueSize := env.BatchSpanProcessorMaxQueueSize(DefaultMaxQueueSize)
  82. maxExportBatchSize := env.BatchSpanProcessorMaxExportBatchSize(DefaultMaxExportBatchSize)
  83. if maxExportBatchSize > maxQueueSize {
  84. if DefaultMaxExportBatchSize > maxQueueSize {
  85. maxExportBatchSize = maxQueueSize
  86. } else {
  87. maxExportBatchSize = DefaultMaxExportBatchSize
  88. }
  89. }
  90. o := BatchSpanProcessorOptions{
  91. BatchTimeout: time.Duration(env.BatchSpanProcessorScheduleDelay(DefaultScheduleDelay)) * time.Millisecond,
  92. ExportTimeout: time.Duration(env.BatchSpanProcessorExportTimeout(DefaultExportTimeout)) * time.Millisecond,
  93. MaxQueueSize: maxQueueSize,
  94. MaxExportBatchSize: maxExportBatchSize,
  95. }
  96. for _, opt := range options {
  97. opt(&o)
  98. }
  99. bsp := &batchSpanProcessor{
  100. e: exporter,
  101. o: o,
  102. batch: make([]ReadOnlySpan, 0, o.MaxExportBatchSize),
  103. timer: time.NewTimer(o.BatchTimeout),
  104. queue: make(chan ReadOnlySpan, o.MaxQueueSize),
  105. stopCh: make(chan struct{}),
  106. }
  107. bsp.stopWait.Add(1)
  108. go func() {
  109. defer bsp.stopWait.Done()
  110. bsp.processQueue()
  111. bsp.drainQueue()
  112. }()
  113. return bsp
  114. }
  115. // OnStart method does nothing.
  116. func (bsp *batchSpanProcessor) OnStart(parent context.Context, s ReadWriteSpan) {}
  117. // OnEnd method enqueues a ReadOnlySpan for later processing.
  118. func (bsp *batchSpanProcessor) OnEnd(s ReadOnlySpan) {
  119. // Do not enqueue spans if we are just going to drop them.
  120. if bsp.e == nil {
  121. return
  122. }
  123. bsp.enqueue(s)
  124. }
  125. // Shutdown flushes the queue and waits until all spans are processed.
  126. // It only executes once. Subsequent call does nothing.
  127. func (bsp *batchSpanProcessor) Shutdown(ctx context.Context) error {
  128. var err error
  129. bsp.stopOnce.Do(func() {
  130. wait := make(chan struct{})
  131. go func() {
  132. close(bsp.stopCh)
  133. bsp.stopWait.Wait()
  134. if bsp.e != nil {
  135. if err := bsp.e.Shutdown(ctx); err != nil {
  136. otel.Handle(err)
  137. }
  138. }
  139. close(wait)
  140. }()
  141. // Wait until the wait group is done or the context is cancelled
  142. select {
  143. case <-wait:
  144. case <-ctx.Done():
  145. err = ctx.Err()
  146. }
  147. })
  148. return err
  149. }
  150. type forceFlushSpan struct {
  151. ReadOnlySpan
  152. flushed chan struct{}
  153. }
  154. func (f forceFlushSpan) SpanContext() trace.SpanContext {
  155. return trace.NewSpanContext(trace.SpanContextConfig{TraceFlags: trace.FlagsSampled})
  156. }
  157. // ForceFlush exports all ended spans that have not yet been exported.
  158. func (bsp *batchSpanProcessor) ForceFlush(ctx context.Context) error {
  159. var err error
  160. if bsp.e != nil {
  161. flushCh := make(chan struct{})
  162. if bsp.enqueueBlockOnQueueFull(ctx, forceFlushSpan{flushed: flushCh}) {
  163. select {
  164. case <-flushCh:
  165. // Processed any items in queue prior to ForceFlush being called
  166. case <-ctx.Done():
  167. return ctx.Err()
  168. }
  169. }
  170. wait := make(chan error)
  171. go func() {
  172. wait <- bsp.exportSpans(ctx)
  173. close(wait)
  174. }()
  175. // Wait until the export is finished or the context is cancelled/timed out
  176. select {
  177. case err = <-wait:
  178. case <-ctx.Done():
  179. err = ctx.Err()
  180. }
  181. }
  182. return err
  183. }
  184. // WithMaxQueueSize returns a BatchSpanProcessorOption that configures the
  185. // maximum queue size allowed for a BatchSpanProcessor.
  186. func WithMaxQueueSize(size int) BatchSpanProcessorOption {
  187. return func(o *BatchSpanProcessorOptions) {
  188. o.MaxQueueSize = size
  189. }
  190. }
  191. // WithMaxExportBatchSize returns a BatchSpanProcessorOption that configures
  192. // the maximum export batch size allowed for a BatchSpanProcessor.
  193. func WithMaxExportBatchSize(size int) BatchSpanProcessorOption {
  194. return func(o *BatchSpanProcessorOptions) {
  195. o.MaxExportBatchSize = size
  196. }
  197. }
  198. // WithBatchTimeout returns a BatchSpanProcessorOption that configures the
  199. // maximum delay allowed for a BatchSpanProcessor before it will export any
  200. // held span (whether the queue is full or not).
  201. func WithBatchTimeout(delay time.Duration) BatchSpanProcessorOption {
  202. return func(o *BatchSpanProcessorOptions) {
  203. o.BatchTimeout = delay
  204. }
  205. }
  206. // WithExportTimeout returns a BatchSpanProcessorOption that configures the
  207. // amount of time a BatchSpanProcessor waits for an exporter to export before
  208. // abandoning the export.
  209. func WithExportTimeout(timeout time.Duration) BatchSpanProcessorOption {
  210. return func(o *BatchSpanProcessorOptions) {
  211. o.ExportTimeout = timeout
  212. }
  213. }
  214. // WithBlocking returns a BatchSpanProcessorOption that configures a
  215. // BatchSpanProcessor to wait for enqueue operations to succeed instead of
  216. // dropping data when the queue is full.
  217. func WithBlocking() BatchSpanProcessorOption {
  218. return func(o *BatchSpanProcessorOptions) {
  219. o.BlockOnQueueFull = true
  220. }
  221. }
  222. // exportSpans is a subroutine of processing and draining the queue.
  223. func (bsp *batchSpanProcessor) exportSpans(ctx context.Context) error {
  224. bsp.timer.Reset(bsp.o.BatchTimeout)
  225. bsp.batchMutex.Lock()
  226. defer bsp.batchMutex.Unlock()
  227. if bsp.o.ExportTimeout > 0 {
  228. var cancel context.CancelFunc
  229. ctx, cancel = context.WithTimeout(ctx, bsp.o.ExportTimeout)
  230. defer cancel()
  231. }
  232. if l := len(bsp.batch); l > 0 {
  233. global.Debug("exporting spans", "count", len(bsp.batch), "total_dropped", atomic.LoadUint32(&bsp.dropped))
  234. err := bsp.e.ExportSpans(ctx, bsp.batch)
  235. // A new batch is always created after exporting, even if the batch failed to be exported.
  236. //
  237. // It is up to the exporter to implement any type of retry logic if a batch is failing
  238. // to be exported, since it is specific to the protocol and backend being sent to.
  239. bsp.batch = bsp.batch[:0]
  240. if err != nil {
  241. return err
  242. }
  243. }
  244. return nil
  245. }
  246. // processQueue removes spans from the `queue` channel until processor
  247. // is shut down. It calls the exporter in batches of up to MaxExportBatchSize
  248. // waiting up to BatchTimeout to form a batch.
  249. func (bsp *batchSpanProcessor) processQueue() {
  250. defer bsp.timer.Stop()
  251. ctx, cancel := context.WithCancel(context.Background())
  252. defer cancel()
  253. for {
  254. select {
  255. case <-bsp.stopCh:
  256. return
  257. case <-bsp.timer.C:
  258. if err := bsp.exportSpans(ctx); err != nil {
  259. otel.Handle(err)
  260. }
  261. case sd := <-bsp.queue:
  262. if ffs, ok := sd.(forceFlushSpan); ok {
  263. close(ffs.flushed)
  264. continue
  265. }
  266. bsp.batchMutex.Lock()
  267. bsp.batch = append(bsp.batch, sd)
  268. shouldExport := len(bsp.batch) >= bsp.o.MaxExportBatchSize
  269. bsp.batchMutex.Unlock()
  270. if shouldExport {
  271. if !bsp.timer.Stop() {
  272. <-bsp.timer.C
  273. }
  274. if err := bsp.exportSpans(ctx); err != nil {
  275. otel.Handle(err)
  276. }
  277. }
  278. }
  279. }
  280. }
  281. // drainQueue awaits the any caller that had added to bsp.stopWait
  282. // to finish the enqueue, then exports the final batch.
  283. func (bsp *batchSpanProcessor) drainQueue() {
  284. ctx, cancel := context.WithCancel(context.Background())
  285. defer cancel()
  286. for {
  287. select {
  288. case sd := <-bsp.queue:
  289. if sd == nil {
  290. if err := bsp.exportSpans(ctx); err != nil {
  291. otel.Handle(err)
  292. }
  293. return
  294. }
  295. bsp.batchMutex.Lock()
  296. bsp.batch = append(bsp.batch, sd)
  297. shouldExport := len(bsp.batch) == bsp.o.MaxExportBatchSize
  298. bsp.batchMutex.Unlock()
  299. if shouldExport {
  300. if err := bsp.exportSpans(ctx); err != nil {
  301. otel.Handle(err)
  302. }
  303. }
  304. default:
  305. close(bsp.queue)
  306. }
  307. }
  308. }
  309. func (bsp *batchSpanProcessor) enqueue(sd ReadOnlySpan) {
  310. ctx := context.TODO()
  311. if bsp.o.BlockOnQueueFull {
  312. bsp.enqueueBlockOnQueueFull(ctx, sd)
  313. } else {
  314. bsp.enqueueDrop(ctx, sd)
  315. }
  316. }
  317. func recoverSendOnClosedChan() {
  318. x := recover()
  319. switch err := x.(type) {
  320. case nil:
  321. return
  322. case runtime.Error:
  323. if err.Error() == "send on closed channel" {
  324. return
  325. }
  326. }
  327. panic(x)
  328. }
  329. func (bsp *batchSpanProcessor) enqueueBlockOnQueueFull(ctx context.Context, sd ReadOnlySpan) bool {
  330. if !sd.SpanContext().IsSampled() {
  331. return false
  332. }
  333. // This ensures the bsp.queue<- below does not panic as the
  334. // processor shuts down.
  335. defer recoverSendOnClosedChan()
  336. select {
  337. case <-bsp.stopCh:
  338. return false
  339. default:
  340. }
  341. select {
  342. case bsp.queue <- sd:
  343. return true
  344. case <-ctx.Done():
  345. return false
  346. }
  347. }
  348. func (bsp *batchSpanProcessor) enqueueDrop(ctx context.Context, sd ReadOnlySpan) bool {
  349. if !sd.SpanContext().IsSampled() {
  350. return false
  351. }
  352. // This ensures the bsp.queue<- below does not panic as the
  353. // processor shuts down.
  354. defer recoverSendOnClosedChan()
  355. select {
  356. case <-bsp.stopCh:
  357. return false
  358. default:
  359. }
  360. select {
  361. case bsp.queue <- sd:
  362. return true
  363. default:
  364. atomic.AddUint32(&bsp.dropped, 1)
  365. }
  366. return false
  367. }
  368. // MarshalLog is the marshaling function used by the logging system to represent this exporter.
  369. func (bsp *batchSpanProcessor) MarshalLog() interface{} {
  370. return struct {
  371. Type string
  372. SpanExporter SpanExporter
  373. Config BatchSpanProcessorOptions
  374. }{
  375. Type: "BatchSpanProcessor",
  376. SpanExporter: bsp.e,
  377. Config: bsp.o,
  378. }
  379. }