idle.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. /*
  2. *
  3. * Copyright 2023 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. // Package idle contains a component for managing idleness (entering and exiting)
  19. // based on RPC activity.
  20. package idle
  21. import (
  22. "fmt"
  23. "math"
  24. "sync"
  25. "sync/atomic"
  26. "time"
  27. )
  28. // For overriding in unit tests.
  29. var timeAfterFunc = func(d time.Duration, f func()) *time.Timer {
  30. return time.AfterFunc(d, f)
  31. }
  32. // Enforcer is the functionality provided by grpc.ClientConn to enter
  33. // and exit from idle mode.
  34. type Enforcer interface {
  35. ExitIdleMode() error
  36. EnterIdleMode()
  37. }
  38. // Manager implements idleness detection and calls the configured Enforcer to
  39. // enter/exit idle mode when appropriate. Must be created by NewManager.
  40. type Manager struct {
  41. // State accessed atomically.
  42. lastCallEndTime int64 // Unix timestamp in nanos; time when the most recent RPC completed.
  43. activeCallsCount int32 // Count of active RPCs; -math.MaxInt32 means channel is idle or is trying to get there.
  44. activeSinceLastTimerCheck int32 // Boolean; True if there was an RPC since the last timer callback.
  45. closed int32 // Boolean; True when the manager is closed.
  46. // Can be accessed without atomics or mutex since these are set at creation
  47. // time and read-only after that.
  48. enforcer Enforcer // Functionality provided by grpc.ClientConn.
  49. timeout time.Duration
  50. // idleMu is used to guarantee mutual exclusion in two scenarios:
  51. // - Opposing intentions:
  52. // - a: Idle timeout has fired and handleIdleTimeout() is trying to put
  53. // the channel in idle mode because the channel has been inactive.
  54. // - b: At the same time an RPC is made on the channel, and OnCallBegin()
  55. // is trying to prevent the channel from going idle.
  56. // - Competing intentions:
  57. // - The channel is in idle mode and there are multiple RPCs starting at
  58. // the same time, all trying to move the channel out of idle. Only one
  59. // of them should succeed in doing so, while the other RPCs should
  60. // piggyback on the first one and be successfully handled.
  61. idleMu sync.RWMutex
  62. actuallyIdle bool
  63. timer *time.Timer
  64. }
  65. // NewManager creates a new idleness manager implementation for the
  66. // given idle timeout. It begins in idle mode.
  67. func NewManager(enforcer Enforcer, timeout time.Duration) *Manager {
  68. return &Manager{
  69. enforcer: enforcer,
  70. timeout: timeout,
  71. actuallyIdle: true,
  72. activeCallsCount: -math.MaxInt32,
  73. }
  74. }
  75. // resetIdleTimerLocked resets the idle timer to the given duration. Called
  76. // when exiting idle mode or when the timer fires and we need to reset it.
  77. func (m *Manager) resetIdleTimerLocked(d time.Duration) {
  78. if m.isClosed() || m.timeout == 0 || m.actuallyIdle {
  79. return
  80. }
  81. // It is safe to ignore the return value from Reset() because this method is
  82. // only ever called from the timer callback or when exiting idle mode.
  83. if m.timer != nil {
  84. m.timer.Stop()
  85. }
  86. m.timer = timeAfterFunc(d, m.handleIdleTimeout)
  87. }
  88. func (m *Manager) resetIdleTimer(d time.Duration) {
  89. m.idleMu.Lock()
  90. defer m.idleMu.Unlock()
  91. m.resetIdleTimerLocked(d)
  92. }
  93. // handleIdleTimeout is the timer callback that is invoked upon expiry of the
  94. // configured idle timeout. The channel is considered inactive if there are no
  95. // ongoing calls and no RPC activity since the last time the timer fired.
  96. func (m *Manager) handleIdleTimeout() {
  97. if m.isClosed() {
  98. return
  99. }
  100. if atomic.LoadInt32(&m.activeCallsCount) > 0 {
  101. m.resetIdleTimer(m.timeout)
  102. return
  103. }
  104. // There has been activity on the channel since we last got here. Reset the
  105. // timer and return.
  106. if atomic.LoadInt32(&m.activeSinceLastTimerCheck) == 1 {
  107. // Set the timer to fire after a duration of idle timeout, calculated
  108. // from the time the most recent RPC completed.
  109. atomic.StoreInt32(&m.activeSinceLastTimerCheck, 0)
  110. m.resetIdleTimer(time.Duration(atomic.LoadInt64(&m.lastCallEndTime)-time.Now().UnixNano()) + m.timeout)
  111. return
  112. }
  113. // Now that we've checked that there has been no activity, attempt to enter
  114. // idle mode, which is very likely to succeed.
  115. if m.tryEnterIdleMode() {
  116. // Successfully entered idle mode. No timer needed until we exit idle.
  117. return
  118. }
  119. // Failed to enter idle mode due to a concurrent RPC that kept the channel
  120. // active, or because of an error from the channel. Undo the attempt to
  121. // enter idle, and reset the timer to try again later.
  122. m.resetIdleTimer(m.timeout)
  123. }
  124. // tryEnterIdleMode instructs the channel to enter idle mode. But before
  125. // that, it performs a last minute check to ensure that no new RPC has come in,
  126. // making the channel active.
  127. //
  128. // Return value indicates whether or not the channel moved to idle mode.
  129. //
  130. // Holds idleMu which ensures mutual exclusion with exitIdleMode.
  131. func (m *Manager) tryEnterIdleMode() bool {
  132. // Setting the activeCallsCount to -math.MaxInt32 indicates to OnCallBegin()
  133. // that the channel is either in idle mode or is trying to get there.
  134. if !atomic.CompareAndSwapInt32(&m.activeCallsCount, 0, -math.MaxInt32) {
  135. // This CAS operation can fail if an RPC started after we checked for
  136. // activity in the timer handler, or one was ongoing from before the
  137. // last time the timer fired, or if a test is attempting to enter idle
  138. // mode without checking. In all cases, abort going into idle mode.
  139. return false
  140. }
  141. // N.B. if we fail to enter idle mode after this, we must re-add
  142. // math.MaxInt32 to m.activeCallsCount.
  143. m.idleMu.Lock()
  144. defer m.idleMu.Unlock()
  145. if atomic.LoadInt32(&m.activeCallsCount) != -math.MaxInt32 {
  146. // We raced and lost to a new RPC. Very rare, but stop entering idle.
  147. atomic.AddInt32(&m.activeCallsCount, math.MaxInt32)
  148. return false
  149. }
  150. if atomic.LoadInt32(&m.activeSinceLastTimerCheck) == 1 {
  151. // A very short RPC could have come in (and also finished) after we
  152. // checked for calls count and activity in handleIdleTimeout(), but
  153. // before the CAS operation. So, we need to check for activity again.
  154. atomic.AddInt32(&m.activeCallsCount, math.MaxInt32)
  155. return false
  156. }
  157. // No new RPCs have come in since we set the active calls count value to
  158. // -math.MaxInt32. And since we have the lock, it is safe to enter idle mode
  159. // unconditionally now.
  160. m.enforcer.EnterIdleMode()
  161. m.actuallyIdle = true
  162. return true
  163. }
  164. func (m *Manager) EnterIdleModeForTesting() {
  165. m.tryEnterIdleMode()
  166. }
  167. // OnCallBegin is invoked at the start of every RPC.
  168. func (m *Manager) OnCallBegin() error {
  169. if m.isClosed() {
  170. return nil
  171. }
  172. if atomic.AddInt32(&m.activeCallsCount, 1) > 0 {
  173. // Channel is not idle now. Set the activity bit and allow the call.
  174. atomic.StoreInt32(&m.activeSinceLastTimerCheck, 1)
  175. return nil
  176. }
  177. // Channel is either in idle mode or is in the process of moving to idle
  178. // mode. Attempt to exit idle mode to allow this RPC.
  179. if err := m.ExitIdleMode(); err != nil {
  180. // Undo the increment to calls count, and return an error causing the
  181. // RPC to fail.
  182. atomic.AddInt32(&m.activeCallsCount, -1)
  183. return err
  184. }
  185. atomic.StoreInt32(&m.activeSinceLastTimerCheck, 1)
  186. return nil
  187. }
  188. // ExitIdleMode instructs m to call the enforcer's ExitIdleMode and update m's
  189. // internal state.
  190. func (m *Manager) ExitIdleMode() error {
  191. // Holds idleMu which ensures mutual exclusion with tryEnterIdleMode.
  192. m.idleMu.Lock()
  193. defer m.idleMu.Unlock()
  194. if m.isClosed() || !m.actuallyIdle {
  195. // This can happen in three scenarios:
  196. // - handleIdleTimeout() set the calls count to -math.MaxInt32 and called
  197. // tryEnterIdleMode(). But before the latter could grab the lock, an RPC
  198. // came in and OnCallBegin() noticed that the calls count is negative.
  199. // - Channel is in idle mode, and multiple new RPCs come in at the same
  200. // time, all of them notice a negative calls count in OnCallBegin and get
  201. // here. The first one to get the lock would got the channel to exit idle.
  202. // - Channel is not in idle mode, and the user calls Connect which calls
  203. // m.ExitIdleMode.
  204. //
  205. // In any case, there is nothing to do here.
  206. return nil
  207. }
  208. if err := m.enforcer.ExitIdleMode(); err != nil {
  209. return fmt.Errorf("failed to exit idle mode: %w", err)
  210. }
  211. // Undo the idle entry process. This also respects any new RPC attempts.
  212. atomic.AddInt32(&m.activeCallsCount, math.MaxInt32)
  213. m.actuallyIdle = false
  214. // Start a new timer to fire after the configured idle timeout.
  215. m.resetIdleTimerLocked(m.timeout)
  216. return nil
  217. }
  218. // OnCallEnd is invoked at the end of every RPC.
  219. func (m *Manager) OnCallEnd() {
  220. if m.isClosed() {
  221. return
  222. }
  223. // Record the time at which the most recent call finished.
  224. atomic.StoreInt64(&m.lastCallEndTime, time.Now().UnixNano())
  225. // Decrement the active calls count. This count can temporarily go negative
  226. // when the timer callback is in the process of moving the channel to idle
  227. // mode, but one or more RPCs come in and complete before the timer callback
  228. // can get done with the process of moving to idle mode.
  229. atomic.AddInt32(&m.activeCallsCount, -1)
  230. }
  231. func (m *Manager) isClosed() bool {
  232. return atomic.LoadInt32(&m.closed) == 1
  233. }
  234. func (m *Manager) Close() {
  235. atomic.StoreInt32(&m.closed, 1)
  236. m.idleMu.Lock()
  237. if m.timer != nil {
  238. m.timer.Stop()
  239. m.timer = nil
  240. }
  241. m.idleMu.Unlock()
  242. }