watch.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package clientv3
  15. import (
  16. "context"
  17. "errors"
  18. "fmt"
  19. "sync"
  20. "time"
  21. v3rpc "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes"
  22. pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
  23. mvccpb "go.etcd.io/etcd/mvcc/mvccpb"
  24. "go.uber.org/zap"
  25. "google.golang.org/grpc"
  26. "google.golang.org/grpc/codes"
  27. "google.golang.org/grpc/metadata"
  28. "google.golang.org/grpc/status"
  29. )
  30. const (
  31. EventTypeDelete = mvccpb.DELETE
  32. EventTypePut = mvccpb.PUT
  33. closeSendErrTimeout = 250 * time.Millisecond
  34. )
  35. type Event mvccpb.Event
  36. type WatchChan <-chan WatchResponse
  37. type Watcher interface {
  38. // Watch watches on a key or prefix. The watched events will be returned
  39. // through the returned channel. If revisions waiting to be sent over the
  40. // watch are compacted, then the watch will be canceled by the server, the
  41. // client will post a compacted error watch response, and the channel will close.
  42. // If the context "ctx" is canceled or timed out, returned "WatchChan" is closed,
  43. // and "WatchResponse" from this closed channel has zero events and nil "Err()".
  44. // The context "ctx" MUST be canceled, as soon as watcher is no longer being used,
  45. // to release the associated resources.
  46. //
  47. // If the context is "context.Background/TODO", returned "WatchChan" will
  48. // not be closed and block until event is triggered, except when server
  49. // returns a non-recoverable error (e.g. ErrCompacted).
  50. // For example, when context passed with "WithRequireLeader" and the
  51. // connected server has no leader (e.g. due to network partition),
  52. // error "etcdserver: no leader" (ErrNoLeader) will be returned,
  53. // and then "WatchChan" is closed with non-nil "Err()".
  54. // In order to prevent a watch stream being stuck in a partitioned node,
  55. // make sure to wrap context with "WithRequireLeader".
  56. //
  57. // Otherwise, as long as the context has not been canceled or timed out,
  58. // watch will retry on other recoverable errors forever until reconnected.
  59. //
  60. // TODO: explicitly set context error in the last "WatchResponse" message and close channel?
  61. // Currently, client contexts are overwritten with "valCtx" that never closes.
  62. // TODO(v3.4): configure watch retry policy, limit maximum retry number
  63. // (see https://github.com/etcd-io/etcd/issues/8980)
  64. Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
  65. // RequestProgress requests a progress notify response be sent in all watch channels.
  66. RequestProgress(ctx context.Context) error
  67. // Close closes the watcher and cancels all watch requests.
  68. Close() error
  69. }
  70. type WatchResponse struct {
  71. Header pb.ResponseHeader
  72. Events []*Event
  73. // CompactRevision is the minimum revision the watcher may receive.
  74. CompactRevision int64
  75. // Canceled is used to indicate watch failure.
  76. // If the watch failed and the stream was about to close, before the channel is closed,
  77. // the channel sends a final response that has Canceled set to true with a non-nil Err().
  78. Canceled bool
  79. // Created is used to indicate the creation of the watcher.
  80. Created bool
  81. closeErr error
  82. // cancelReason is a reason of canceling watch
  83. cancelReason string
  84. }
  85. // IsCreate returns true if the event tells that the key is newly created.
  86. func (e *Event) IsCreate() bool {
  87. return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
  88. }
  89. // IsModify returns true if the event tells that a new value is put on existing key.
  90. func (e *Event) IsModify() bool {
  91. return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
  92. }
  93. // Err is the error value if this WatchResponse holds an error.
  94. func (wr *WatchResponse) Err() error {
  95. switch {
  96. case wr.closeErr != nil:
  97. return v3rpc.Error(wr.closeErr)
  98. case wr.CompactRevision != 0:
  99. return v3rpc.ErrCompacted
  100. case wr.Canceled:
  101. if len(wr.cancelReason) != 0 {
  102. return v3rpc.Error(status.Error(codes.FailedPrecondition, wr.cancelReason))
  103. }
  104. return v3rpc.ErrFutureRev
  105. }
  106. return nil
  107. }
  108. // IsProgressNotify returns true if the WatchResponse is progress notification.
  109. func (wr *WatchResponse) IsProgressNotify() bool {
  110. return len(wr.Events) == 0 && !wr.Canceled && !wr.Created && wr.CompactRevision == 0 && wr.Header.Revision != 0
  111. }
  112. // watcher implements the Watcher interface
  113. type watcher struct {
  114. remote pb.WatchClient
  115. callOpts []grpc.CallOption
  116. // mu protects the grpc streams map
  117. mu sync.RWMutex
  118. // streams holds all the active grpc streams keyed by ctx value.
  119. streams map[string]*watchGrpcStream
  120. lg *zap.Logger
  121. }
  122. // watchGrpcStream tracks all watch resources attached to a single grpc stream.
  123. type watchGrpcStream struct {
  124. owner *watcher
  125. remote pb.WatchClient
  126. callOpts []grpc.CallOption
  127. // ctx controls internal remote.Watch requests
  128. ctx context.Context
  129. // ctxKey is the key used when looking up this stream's context
  130. ctxKey string
  131. cancel context.CancelFunc
  132. // substreams holds all active watchers on this grpc stream
  133. substreams map[int64]*watcherStream
  134. // resuming holds all resuming watchers on this grpc stream
  135. resuming []*watcherStream
  136. // reqc sends a watch request from Watch() to the main goroutine
  137. reqc chan watchStreamRequest
  138. // respc receives data from the watch client
  139. respc chan *pb.WatchResponse
  140. // donec closes to broadcast shutdown
  141. donec chan struct{}
  142. // errc transmits errors from grpc Recv to the watch stream reconnect logic
  143. errc chan error
  144. // closingc gets the watcherStream of closing watchers
  145. closingc chan *watcherStream
  146. // wg is Done when all substream goroutines have exited
  147. wg sync.WaitGroup
  148. // resumec closes to signal that all substreams should begin resuming
  149. resumec chan struct{}
  150. // closeErr is the error that closed the watch stream
  151. closeErr error
  152. lg *zap.Logger
  153. }
  154. // watchStreamRequest is a union of the supported watch request operation types
  155. type watchStreamRequest interface {
  156. toPB() *pb.WatchRequest
  157. }
  158. // watchRequest is issued by the subscriber to start a new watcher
  159. type watchRequest struct {
  160. ctx context.Context
  161. key string
  162. end string
  163. rev int64
  164. // send created notification event if this field is true
  165. createdNotify bool
  166. // progressNotify is for progress updates
  167. progressNotify bool
  168. // fragmentation should be disabled by default
  169. // if true, split watch events when total exceeds
  170. // "--max-request-bytes" flag value + 512-byte
  171. fragment bool
  172. // filters is the list of events to filter out
  173. filters []pb.WatchCreateRequest_FilterType
  174. // get the previous key-value pair before the event happens
  175. prevKV bool
  176. // retc receives a chan WatchResponse once the watcher is established
  177. retc chan chan WatchResponse
  178. }
  179. // progressRequest is issued by the subscriber to request watch progress
  180. type progressRequest struct {
  181. }
  182. // watcherStream represents a registered watcher
  183. type watcherStream struct {
  184. // initReq is the request that initiated this request
  185. initReq watchRequest
  186. // outc publishes watch responses to subscriber
  187. outc chan WatchResponse
  188. // recvc buffers watch responses before publishing
  189. recvc chan *WatchResponse
  190. // donec closes when the watcherStream goroutine stops.
  191. donec chan struct{}
  192. // closing is set to true when stream should be scheduled to shutdown.
  193. closing bool
  194. // id is the registered watch id on the grpc stream
  195. id int64
  196. // buf holds all events received from etcd but not yet consumed by the client
  197. buf []*WatchResponse
  198. }
  199. func NewWatcher(c *Client) Watcher {
  200. return NewWatchFromWatchClient(pb.NewWatchClient(c.conn), c)
  201. }
  202. func NewWatchFromWatchClient(wc pb.WatchClient, c *Client) Watcher {
  203. w := &watcher{
  204. remote: wc,
  205. streams: make(map[string]*watchGrpcStream),
  206. }
  207. if c != nil {
  208. w.callOpts = c.callOpts
  209. w.lg = c.lg
  210. }
  211. return w
  212. }
  213. // never closes
  214. var valCtxCh = make(chan struct{})
  215. var zeroTime = time.Unix(0, 0)
  216. // ctx with only the values; never Done
  217. type valCtx struct{ context.Context }
  218. func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
  219. func (vc *valCtx) Done() <-chan struct{} { return valCtxCh }
  220. func (vc *valCtx) Err() error { return nil }
  221. func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
  222. ctx, cancel := context.WithCancel(&valCtx{inctx})
  223. wgs := &watchGrpcStream{
  224. owner: w,
  225. remote: w.remote,
  226. callOpts: w.callOpts,
  227. ctx: ctx,
  228. ctxKey: streamKeyFromCtx(inctx),
  229. cancel: cancel,
  230. substreams: make(map[int64]*watcherStream),
  231. respc: make(chan *pb.WatchResponse),
  232. reqc: make(chan watchStreamRequest),
  233. donec: make(chan struct{}),
  234. errc: make(chan error, 1),
  235. closingc: make(chan *watcherStream),
  236. resumec: make(chan struct{}),
  237. lg: w.lg,
  238. }
  239. go wgs.run()
  240. return wgs
  241. }
  242. // Watch posts a watch request to run() and waits for a new watcher channel
  243. func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
  244. ow := opWatch(key, opts...)
  245. var filters []pb.WatchCreateRequest_FilterType
  246. if ow.filterPut {
  247. filters = append(filters, pb.WatchCreateRequest_NOPUT)
  248. }
  249. if ow.filterDelete {
  250. filters = append(filters, pb.WatchCreateRequest_NODELETE)
  251. }
  252. wr := &watchRequest{
  253. ctx: ctx,
  254. createdNotify: ow.createdNotify,
  255. key: string(ow.key),
  256. end: string(ow.end),
  257. rev: ow.rev,
  258. progressNotify: ow.progressNotify,
  259. fragment: ow.fragment,
  260. filters: filters,
  261. prevKV: ow.prevKV,
  262. retc: make(chan chan WatchResponse, 1),
  263. }
  264. ok := false
  265. ctxKey := streamKeyFromCtx(ctx)
  266. // find or allocate appropriate grpc watch stream
  267. w.mu.Lock()
  268. if w.streams == nil {
  269. // closed
  270. w.mu.Unlock()
  271. ch := make(chan WatchResponse)
  272. close(ch)
  273. return ch
  274. }
  275. wgs := w.streams[ctxKey]
  276. if wgs == nil {
  277. wgs = w.newWatcherGrpcStream(ctx)
  278. w.streams[ctxKey] = wgs
  279. }
  280. donec := wgs.donec
  281. reqc := wgs.reqc
  282. w.mu.Unlock()
  283. // couldn't create channel; return closed channel
  284. closeCh := make(chan WatchResponse, 1)
  285. // submit request
  286. select {
  287. case reqc <- wr:
  288. ok = true
  289. case <-wr.ctx.Done():
  290. case <-donec:
  291. if wgs.closeErr != nil {
  292. closeCh <- WatchResponse{Canceled: true, closeErr: wgs.closeErr}
  293. break
  294. }
  295. // retry; may have dropped stream from no ctxs
  296. return w.Watch(ctx, key, opts...)
  297. }
  298. // receive channel
  299. if ok {
  300. select {
  301. case ret := <-wr.retc:
  302. return ret
  303. case <-ctx.Done():
  304. case <-donec:
  305. if wgs.closeErr != nil {
  306. closeCh <- WatchResponse{Canceled: true, closeErr: wgs.closeErr}
  307. break
  308. }
  309. // retry; may have dropped stream from no ctxs
  310. return w.Watch(ctx, key, opts...)
  311. }
  312. }
  313. close(closeCh)
  314. return closeCh
  315. }
  316. func (w *watcher) Close() (err error) {
  317. w.mu.Lock()
  318. streams := w.streams
  319. w.streams = nil
  320. w.mu.Unlock()
  321. for _, wgs := range streams {
  322. if werr := wgs.close(); werr != nil {
  323. err = werr
  324. }
  325. }
  326. // Consider context.Canceled as a successful close
  327. if err == context.Canceled {
  328. err = nil
  329. }
  330. return err
  331. }
  332. // RequestProgress requests a progress notify response be sent in all watch channels.
  333. func (w *watcher) RequestProgress(ctx context.Context) (err error) {
  334. ctxKey := streamKeyFromCtx(ctx)
  335. w.mu.Lock()
  336. if w.streams == nil {
  337. w.mu.Unlock()
  338. return fmt.Errorf("no stream found for context")
  339. }
  340. wgs := w.streams[ctxKey]
  341. if wgs == nil {
  342. wgs = w.newWatcherGrpcStream(ctx)
  343. w.streams[ctxKey] = wgs
  344. }
  345. donec := wgs.donec
  346. reqc := wgs.reqc
  347. w.mu.Unlock()
  348. pr := &progressRequest{}
  349. select {
  350. case reqc <- pr:
  351. return nil
  352. case <-ctx.Done():
  353. if err == nil {
  354. return ctx.Err()
  355. }
  356. return err
  357. case <-donec:
  358. if wgs.closeErr != nil {
  359. return wgs.closeErr
  360. }
  361. // retry; may have dropped stream from no ctxs
  362. return w.RequestProgress(ctx)
  363. }
  364. }
  365. func (w *watchGrpcStream) close() (err error) {
  366. w.cancel()
  367. <-w.donec
  368. select {
  369. case err = <-w.errc:
  370. default:
  371. }
  372. return toErr(w.ctx, err)
  373. }
  374. func (w *watcher) closeStream(wgs *watchGrpcStream) {
  375. w.mu.Lock()
  376. close(wgs.donec)
  377. wgs.cancel()
  378. if w.streams != nil {
  379. delete(w.streams, wgs.ctxKey)
  380. }
  381. w.mu.Unlock()
  382. }
  383. func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
  384. // check watch ID for backward compatibility (<= v3.3)
  385. if resp.WatchId == -1 || (resp.Canceled && resp.CancelReason != "") {
  386. w.closeErr = v3rpc.Error(errors.New(resp.CancelReason))
  387. // failed; no channel
  388. close(ws.recvc)
  389. return
  390. }
  391. ws.id = resp.WatchId
  392. w.substreams[ws.id] = ws
  393. }
  394. func (w *watchGrpcStream) sendCloseSubstream(ws *watcherStream, resp *WatchResponse) {
  395. select {
  396. case ws.outc <- *resp:
  397. case <-ws.initReq.ctx.Done():
  398. case <-time.After(closeSendErrTimeout):
  399. }
  400. close(ws.outc)
  401. }
  402. func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
  403. // send channel response in case stream was never established
  404. select {
  405. case ws.initReq.retc <- ws.outc:
  406. default:
  407. }
  408. // close subscriber's channel
  409. if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
  410. go w.sendCloseSubstream(ws, &WatchResponse{Canceled: true, closeErr: w.closeErr})
  411. } else if ws.outc != nil {
  412. close(ws.outc)
  413. }
  414. if ws.id != -1 {
  415. delete(w.substreams, ws.id)
  416. return
  417. }
  418. for i := range w.resuming {
  419. if w.resuming[i] == ws {
  420. w.resuming[i] = nil
  421. return
  422. }
  423. }
  424. }
  425. // run is the root of the goroutines for managing a watcher client
  426. func (w *watchGrpcStream) run() {
  427. var wc pb.Watch_WatchClient
  428. var closeErr error
  429. // substreams marked to close but goroutine still running; needed for
  430. // avoiding double-closing recvc on grpc stream teardown
  431. closing := make(map[*watcherStream]struct{})
  432. defer func() {
  433. w.closeErr = closeErr
  434. // shutdown substreams and resuming substreams
  435. for _, ws := range w.substreams {
  436. if _, ok := closing[ws]; !ok {
  437. close(ws.recvc)
  438. closing[ws] = struct{}{}
  439. }
  440. }
  441. for _, ws := range w.resuming {
  442. if _, ok := closing[ws]; ws != nil && !ok {
  443. close(ws.recvc)
  444. closing[ws] = struct{}{}
  445. }
  446. }
  447. w.joinSubstreams()
  448. for range closing {
  449. w.closeSubstream(<-w.closingc)
  450. }
  451. w.wg.Wait()
  452. w.owner.closeStream(w)
  453. }()
  454. // start a stream with the etcd grpc server
  455. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  456. return
  457. }
  458. cancelSet := make(map[int64]struct{})
  459. var cur *pb.WatchResponse
  460. for {
  461. select {
  462. // Watch() requested
  463. case req := <-w.reqc:
  464. switch wreq := req.(type) {
  465. case *watchRequest:
  466. outc := make(chan WatchResponse, 1)
  467. // TODO: pass custom watch ID?
  468. ws := &watcherStream{
  469. initReq: *wreq,
  470. id: -1,
  471. outc: outc,
  472. // unbuffered so resumes won't cause repeat events
  473. recvc: make(chan *WatchResponse),
  474. }
  475. ws.donec = make(chan struct{})
  476. w.wg.Add(1)
  477. go w.serveSubstream(ws, w.resumec)
  478. // queue up for watcher creation/resume
  479. w.resuming = append(w.resuming, ws)
  480. if len(w.resuming) == 1 {
  481. // head of resume queue, can register a new watcher
  482. if err := wc.Send(ws.initReq.toPB()); err != nil {
  483. if w.lg != nil {
  484. w.lg.Debug("error when sending request", zap.Error(err))
  485. }
  486. }
  487. }
  488. case *progressRequest:
  489. if err := wc.Send(wreq.toPB()); err != nil {
  490. if w.lg != nil {
  491. w.lg.Debug("error when sending request", zap.Error(err))
  492. }
  493. }
  494. }
  495. // new events from the watch client
  496. case pbresp := <-w.respc:
  497. if cur == nil || pbresp.Created || pbresp.Canceled {
  498. cur = pbresp
  499. } else if cur != nil && cur.WatchId == pbresp.WatchId {
  500. // merge new events
  501. cur.Events = append(cur.Events, pbresp.Events...)
  502. // update "Fragment" field; last response with "Fragment" == false
  503. cur.Fragment = pbresp.Fragment
  504. }
  505. switch {
  506. case pbresp.Created:
  507. // response to head of queue creation
  508. if ws := w.resuming[0]; ws != nil {
  509. w.addSubstream(pbresp, ws)
  510. w.dispatchEvent(pbresp)
  511. w.resuming[0] = nil
  512. }
  513. if ws := w.nextResume(); ws != nil {
  514. if err := wc.Send(ws.initReq.toPB()); err != nil {
  515. if w.lg != nil {
  516. w.lg.Debug("error when sending request", zap.Error(err))
  517. }
  518. }
  519. }
  520. // reset for next iteration
  521. cur = nil
  522. case pbresp.Canceled && pbresp.CompactRevision == 0:
  523. delete(cancelSet, pbresp.WatchId)
  524. if ws, ok := w.substreams[pbresp.WatchId]; ok {
  525. // signal to stream goroutine to update closingc
  526. close(ws.recvc)
  527. closing[ws] = struct{}{}
  528. }
  529. // reset for next iteration
  530. cur = nil
  531. case cur.Fragment:
  532. // watch response events are still fragmented
  533. // continue to fetch next fragmented event arrival
  534. continue
  535. default:
  536. // dispatch to appropriate watch stream
  537. ok := w.dispatchEvent(cur)
  538. // reset for next iteration
  539. cur = nil
  540. if ok {
  541. break
  542. }
  543. // watch response on unexpected watch id; cancel id
  544. if _, ok := cancelSet[pbresp.WatchId]; ok {
  545. break
  546. }
  547. cancelSet[pbresp.WatchId] = struct{}{}
  548. cr := &pb.WatchRequest_CancelRequest{
  549. CancelRequest: &pb.WatchCancelRequest{
  550. WatchId: pbresp.WatchId,
  551. },
  552. }
  553. req := &pb.WatchRequest{RequestUnion: cr}
  554. if w.lg != nil {
  555. w.lg.Debug("sending watch cancel request for failed dispatch", zap.Int64("watch-id", pbresp.WatchId))
  556. }
  557. if err := wc.Send(req); err != nil {
  558. if w.lg != nil {
  559. w.lg.Debug("failed to send watch cancel request", zap.Int64("watch-id", pbresp.WatchId), zap.Error(err))
  560. }
  561. }
  562. }
  563. // watch client failed on Recv; spawn another if possible
  564. case err := <-w.errc:
  565. if isHaltErr(w.ctx, err) || toErr(w.ctx, err) == v3rpc.ErrNoLeader {
  566. closeErr = err
  567. return
  568. }
  569. if wc, closeErr = w.newWatchClient(); closeErr != nil {
  570. return
  571. }
  572. if ws := w.nextResume(); ws != nil {
  573. if err := wc.Send(ws.initReq.toPB()); err != nil {
  574. if w.lg != nil {
  575. w.lg.Debug("error when sending request", zap.Error(err))
  576. }
  577. }
  578. }
  579. cancelSet = make(map[int64]struct{})
  580. case <-w.ctx.Done():
  581. return
  582. case ws := <-w.closingc:
  583. if ws.id != -1 {
  584. // client is closing an established watch; close it on the server proactively instead of waiting
  585. // to close when the next message arrives
  586. cancelSet[ws.id] = struct{}{}
  587. cr := &pb.WatchRequest_CancelRequest{
  588. CancelRequest: &pb.WatchCancelRequest{
  589. WatchId: ws.id,
  590. },
  591. }
  592. req := &pb.WatchRequest{RequestUnion: cr}
  593. if w.lg != nil {
  594. w.lg.Debug("sending watch cancel request for closed watcher", zap.Int64("watch-id", ws.id))
  595. }
  596. if err := wc.Send(req); err != nil {
  597. if w.lg != nil {
  598. w.lg.Debug("failed to send watch cancel request", zap.Int64("watch-id", ws.id), zap.Error(err))
  599. }
  600. }
  601. }
  602. w.closeSubstream(ws)
  603. delete(closing, ws)
  604. // no more watchers on this stream, shutdown
  605. if len(w.substreams)+len(w.resuming) == 0 {
  606. return
  607. }
  608. }
  609. }
  610. }
  611. // nextResume chooses the next resuming to register with the grpc stream. Abandoned
  612. // streams are marked as nil in the queue since the head must wait for its inflight registration.
  613. func (w *watchGrpcStream) nextResume() *watcherStream {
  614. for len(w.resuming) != 0 {
  615. if w.resuming[0] != nil {
  616. return w.resuming[0]
  617. }
  618. w.resuming = w.resuming[1:len(w.resuming)]
  619. }
  620. return nil
  621. }
  622. // dispatchEvent sends a WatchResponse to the appropriate watcher stream
  623. func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
  624. events := make([]*Event, len(pbresp.Events))
  625. for i, ev := range pbresp.Events {
  626. events[i] = (*Event)(ev)
  627. }
  628. // TODO: return watch ID?
  629. wr := &WatchResponse{
  630. Header: *pbresp.Header,
  631. Events: events,
  632. CompactRevision: pbresp.CompactRevision,
  633. Created: pbresp.Created,
  634. Canceled: pbresp.Canceled,
  635. cancelReason: pbresp.CancelReason,
  636. }
  637. // watch IDs are zero indexed, so request notify watch responses are assigned a watch ID of -1 to
  638. // indicate they should be broadcast.
  639. if wr.IsProgressNotify() && pbresp.WatchId == -1 {
  640. return w.broadcastResponse(wr)
  641. }
  642. return w.unicastResponse(wr, pbresp.WatchId)
  643. }
  644. // broadcastResponse send a watch response to all watch substreams.
  645. func (w *watchGrpcStream) broadcastResponse(wr *WatchResponse) bool {
  646. for _, ws := range w.substreams {
  647. select {
  648. case ws.recvc <- wr:
  649. case <-ws.donec:
  650. }
  651. }
  652. return true
  653. }
  654. // unicastResponse sends a watch response to a specific watch substream.
  655. func (w *watchGrpcStream) unicastResponse(wr *WatchResponse, watchId int64) bool {
  656. ws, ok := w.substreams[watchId]
  657. if !ok {
  658. return false
  659. }
  660. select {
  661. case ws.recvc <- wr:
  662. case <-ws.donec:
  663. return false
  664. }
  665. return true
  666. }
  667. // serveWatchClient forwards messages from the grpc stream to run()
  668. func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
  669. for {
  670. resp, err := wc.Recv()
  671. if err != nil {
  672. select {
  673. case w.errc <- err:
  674. case <-w.donec:
  675. }
  676. return
  677. }
  678. select {
  679. case w.respc <- resp:
  680. case <-w.donec:
  681. return
  682. }
  683. }
  684. }
  685. // serveSubstream forwards watch responses from run() to the subscriber
  686. func (w *watchGrpcStream) serveSubstream(ws *watcherStream, resumec chan struct{}) {
  687. if ws.closing {
  688. panic("created substream goroutine but substream is closing")
  689. }
  690. // nextRev is the minimum expected next revision
  691. nextRev := ws.initReq.rev
  692. resuming := false
  693. defer func() {
  694. if !resuming {
  695. ws.closing = true
  696. }
  697. close(ws.donec)
  698. if !resuming {
  699. w.closingc <- ws
  700. }
  701. w.wg.Done()
  702. }()
  703. emptyWr := &WatchResponse{}
  704. for {
  705. curWr := emptyWr
  706. outc := ws.outc
  707. if len(ws.buf) > 0 {
  708. curWr = ws.buf[0]
  709. } else {
  710. outc = nil
  711. }
  712. select {
  713. case outc <- *curWr:
  714. if ws.buf[0].Err() != nil {
  715. return
  716. }
  717. ws.buf[0] = nil
  718. ws.buf = ws.buf[1:]
  719. case wr, ok := <-ws.recvc:
  720. if !ok {
  721. // shutdown from closeSubstream
  722. return
  723. }
  724. if wr.Created {
  725. if ws.initReq.retc != nil {
  726. ws.initReq.retc <- ws.outc
  727. // to prevent next write from taking the slot in buffered channel
  728. // and posting duplicate create events
  729. ws.initReq.retc = nil
  730. // send first creation event only if requested
  731. if ws.initReq.createdNotify {
  732. ws.outc <- *wr
  733. }
  734. // once the watch channel is returned, a current revision
  735. // watch must resume at the store revision. This is necessary
  736. // for the following case to work as expected:
  737. // wch := m1.Watch("a")
  738. // m2.Put("a", "b")
  739. // <-wch
  740. // If the revision is only bound on the first observed event,
  741. // if wch is disconnected before the Put is issued, then reconnects
  742. // after it is committed, it'll miss the Put.
  743. if ws.initReq.rev == 0 {
  744. nextRev = wr.Header.Revision
  745. }
  746. }
  747. } else {
  748. // current progress of watch; <= store revision
  749. nextRev = wr.Header.Revision
  750. }
  751. if len(wr.Events) > 0 {
  752. nextRev = wr.Events[len(wr.Events)-1].Kv.ModRevision + 1
  753. }
  754. ws.initReq.rev = nextRev
  755. // created event is already sent above,
  756. // watcher should not post duplicate events
  757. if wr.Created {
  758. continue
  759. }
  760. // TODO pause channel if buffer gets too large
  761. ws.buf = append(ws.buf, wr)
  762. case <-w.ctx.Done():
  763. return
  764. case <-ws.initReq.ctx.Done():
  765. return
  766. case <-resumec:
  767. resuming = true
  768. return
  769. }
  770. }
  771. // lazily send cancel message if events on missing id
  772. }
  773. func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
  774. // mark all substreams as resuming
  775. close(w.resumec)
  776. w.resumec = make(chan struct{})
  777. w.joinSubstreams()
  778. for _, ws := range w.substreams {
  779. ws.id = -1
  780. w.resuming = append(w.resuming, ws)
  781. }
  782. // strip out nils, if any
  783. var resuming []*watcherStream
  784. for _, ws := range w.resuming {
  785. if ws != nil {
  786. resuming = append(resuming, ws)
  787. }
  788. }
  789. w.resuming = resuming
  790. w.substreams = make(map[int64]*watcherStream)
  791. // connect to grpc stream while accepting watcher cancelation
  792. stopc := make(chan struct{})
  793. donec := w.waitCancelSubstreams(stopc)
  794. wc, err := w.openWatchClient()
  795. close(stopc)
  796. <-donec
  797. // serve all non-closing streams, even if there's a client error
  798. // so that the teardown path can shutdown the streams as expected.
  799. for _, ws := range w.resuming {
  800. if ws.closing {
  801. continue
  802. }
  803. ws.donec = make(chan struct{})
  804. w.wg.Add(1)
  805. go w.serveSubstream(ws, w.resumec)
  806. }
  807. if err != nil {
  808. return nil, v3rpc.Error(err)
  809. }
  810. // receive data from new grpc stream
  811. go w.serveWatchClient(wc)
  812. return wc, nil
  813. }
  814. func (w *watchGrpcStream) waitCancelSubstreams(stopc <-chan struct{}) <-chan struct{} {
  815. var wg sync.WaitGroup
  816. wg.Add(len(w.resuming))
  817. donec := make(chan struct{})
  818. for i := range w.resuming {
  819. go func(ws *watcherStream) {
  820. defer wg.Done()
  821. if ws.closing {
  822. if ws.initReq.ctx.Err() != nil && ws.outc != nil {
  823. close(ws.outc)
  824. ws.outc = nil
  825. }
  826. return
  827. }
  828. select {
  829. case <-ws.initReq.ctx.Done():
  830. // closed ws will be removed from resuming
  831. ws.closing = true
  832. close(ws.outc)
  833. ws.outc = nil
  834. w.wg.Add(1)
  835. go func() {
  836. defer w.wg.Done()
  837. w.closingc <- ws
  838. }()
  839. case <-stopc:
  840. }
  841. }(w.resuming[i])
  842. }
  843. go func() {
  844. defer close(donec)
  845. wg.Wait()
  846. }()
  847. return donec
  848. }
  849. // joinSubstreams waits for all substream goroutines to complete.
  850. func (w *watchGrpcStream) joinSubstreams() {
  851. for _, ws := range w.substreams {
  852. <-ws.donec
  853. }
  854. for _, ws := range w.resuming {
  855. if ws != nil {
  856. <-ws.donec
  857. }
  858. }
  859. }
  860. var maxBackoff = 100 * time.Millisecond
  861. // openWatchClient retries opening a watch client until success or halt.
  862. // manually retry in case "ws==nil && err==nil"
  863. // TODO: remove FailFast=false
  864. func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
  865. backoff := time.Millisecond
  866. for {
  867. select {
  868. case <-w.ctx.Done():
  869. if err == nil {
  870. return nil, w.ctx.Err()
  871. }
  872. return nil, err
  873. default:
  874. }
  875. if ws, err = w.remote.Watch(w.ctx, w.callOpts...); ws != nil && err == nil {
  876. break
  877. }
  878. if isHaltErr(w.ctx, err) {
  879. return nil, v3rpc.Error(err)
  880. }
  881. if isUnavailableErr(w.ctx, err) {
  882. // retry, but backoff
  883. if backoff < maxBackoff {
  884. // 25% backoff factor
  885. backoff = backoff + backoff/4
  886. if backoff > maxBackoff {
  887. backoff = maxBackoff
  888. }
  889. }
  890. time.Sleep(backoff)
  891. }
  892. }
  893. return ws, nil
  894. }
  895. // toPB converts an internal watch request structure to its protobuf WatchRequest structure.
  896. func (wr *watchRequest) toPB() *pb.WatchRequest {
  897. req := &pb.WatchCreateRequest{
  898. StartRevision: wr.rev,
  899. Key: []byte(wr.key),
  900. RangeEnd: []byte(wr.end),
  901. ProgressNotify: wr.progressNotify,
  902. Filters: wr.filters,
  903. PrevKv: wr.prevKV,
  904. Fragment: wr.fragment,
  905. }
  906. cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
  907. return &pb.WatchRequest{RequestUnion: cr}
  908. }
  909. // toPB converts an internal progress request structure to its protobuf WatchRequest structure.
  910. func (pr *progressRequest) toPB() *pb.WatchRequest {
  911. req := &pb.WatchProgressRequest{}
  912. cr := &pb.WatchRequest_ProgressRequest{ProgressRequest: req}
  913. return &pb.WatchRequest{RequestUnion: cr}
  914. }
  915. func streamKeyFromCtx(ctx context.Context) string {
  916. if md, ok := metadata.FromOutgoingContext(ctx); ok {
  917. return fmt.Sprintf("%+v", md)
  918. }
  919. return ""
  920. }