balancer.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /*
  2. *
  3. * Copyright 2017 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. // Package balancer defines APIs for load balancing in gRPC.
  19. // All APIs in this package are experimental.
  20. package balancer
  21. import (
  22. "context"
  23. "encoding/json"
  24. "errors"
  25. "net"
  26. "strings"
  27. "google.golang.org/grpc/connectivity"
  28. "google.golang.org/grpc/credentials"
  29. "google.golang.org/grpc/internal"
  30. "google.golang.org/grpc/metadata"
  31. "google.golang.org/grpc/resolver"
  32. "google.golang.org/grpc/serviceconfig"
  33. )
  34. var (
  35. // m is a map from name to balancer builder.
  36. m = make(map[string]Builder)
  37. )
  38. // Register registers the balancer builder to the balancer map. b.Name
  39. // (lowercased) will be used as the name registered with this builder. If the
  40. // Builder implements ConfigParser, ParseConfig will be called when new service
  41. // configs are received by the resolver, and the result will be provided to the
  42. // Balancer in UpdateClientConnState.
  43. //
  44. // NOTE: this function must only be called during initialization time (i.e. in
  45. // an init() function), and is not thread-safe. If multiple Balancers are
  46. // registered with the same name, the one registered last will take effect.
  47. func Register(b Builder) {
  48. m[strings.ToLower(b.Name())] = b
  49. }
  50. // unregisterForTesting deletes the balancer with the given name from the
  51. // balancer map.
  52. //
  53. // This function is not thread-safe.
  54. func unregisterForTesting(name string) {
  55. delete(m, name)
  56. }
  57. func init() {
  58. internal.BalancerUnregister = unregisterForTesting
  59. }
  60. // Get returns the resolver builder registered with the given name.
  61. // Note that the compare is done in a case-insensitive fashion.
  62. // If no builder is register with the name, nil will be returned.
  63. func Get(name string) Builder {
  64. if b, ok := m[strings.ToLower(name)]; ok {
  65. return b
  66. }
  67. return nil
  68. }
  69. // SubConn represents a gRPC sub connection.
  70. // Each sub connection contains a list of addresses. gRPC will
  71. // try to connect to them (in sequence), and stop trying the
  72. // remainder once one connection is successful.
  73. //
  74. // The reconnect backoff will be applied on the list, not a single address.
  75. // For example, try_on_all_addresses -> backoff -> try_on_all_addresses.
  76. //
  77. // All SubConns start in IDLE, and will not try to connect. To trigger
  78. // the connecting, Balancers must call Connect.
  79. // When the connection encounters an error, it will reconnect immediately.
  80. // When the connection becomes IDLE, it will not reconnect unless Connect is
  81. // called.
  82. //
  83. // This interface is to be implemented by gRPC. Users should not need a
  84. // brand new implementation of this interface. For the situations like
  85. // testing, the new implementation should embed this interface. This allows
  86. // gRPC to add new methods to this interface.
  87. type SubConn interface {
  88. // UpdateAddresses updates the addresses used in this SubConn.
  89. // gRPC checks if currently-connected address is still in the new list.
  90. // If it's in the list, the connection will be kept.
  91. // If it's not in the list, the connection will gracefully closed, and
  92. // a new connection will be created.
  93. //
  94. // This will trigger a state transition for the SubConn.
  95. UpdateAddresses([]resolver.Address)
  96. // Connect starts the connecting for this SubConn.
  97. Connect()
  98. }
  99. // NewSubConnOptions contains options to create new SubConn.
  100. type NewSubConnOptions struct {
  101. // CredsBundle is the credentials bundle that will be used in the created
  102. // SubConn. If it's nil, the original creds from grpc DialOptions will be
  103. // used.
  104. CredsBundle credentials.Bundle
  105. // HealthCheckEnabled indicates whether health check service should be
  106. // enabled on this SubConn
  107. HealthCheckEnabled bool
  108. }
  109. // State contains the balancer's state relevant to the gRPC ClientConn.
  110. type State struct {
  111. // State contains the connectivity state of the balancer, which is used to
  112. // determine the state of the ClientConn.
  113. ConnectivityState connectivity.State
  114. // Picker is used to choose connections (SubConns) for RPCs.
  115. Picker V2Picker
  116. }
  117. // ClientConn represents a gRPC ClientConn.
  118. //
  119. // This interface is to be implemented by gRPC. Users should not need a
  120. // brand new implementation of this interface. For the situations like
  121. // testing, the new implementation should embed this interface. This allows
  122. // gRPC to add new methods to this interface.
  123. type ClientConn interface {
  124. // NewSubConn is called by balancer to create a new SubConn.
  125. // It doesn't block and wait for the connections to be established.
  126. // Behaviors of the SubConn can be controlled by options.
  127. NewSubConn([]resolver.Address, NewSubConnOptions) (SubConn, error)
  128. // RemoveSubConn removes the SubConn from ClientConn.
  129. // The SubConn will be shutdown.
  130. RemoveSubConn(SubConn)
  131. // UpdateBalancerState is called by balancer to notify gRPC that some internal
  132. // state in balancer has changed.
  133. //
  134. // gRPC will update the connectivity state of the ClientConn, and will call pick
  135. // on the new picker to pick new SubConn.
  136. //
  137. // Deprecated: use UpdateState instead
  138. UpdateBalancerState(s connectivity.State, p Picker)
  139. // UpdateState notifies gRPC that the balancer's internal state has
  140. // changed.
  141. //
  142. // gRPC will update the connectivity state of the ClientConn, and will call pick
  143. // on the new picker to pick new SubConns.
  144. UpdateState(State)
  145. // ResolveNow is called by balancer to notify gRPC to do a name resolving.
  146. ResolveNow(resolver.ResolveNowOptions)
  147. // Target returns the dial target for this ClientConn.
  148. //
  149. // Deprecated: Use the Target field in the BuildOptions instead.
  150. Target() string
  151. }
  152. // BuildOptions contains additional information for Build.
  153. type BuildOptions struct {
  154. // DialCreds is the transport credential the Balancer implementation can
  155. // use to dial to a remote load balancer server. The Balancer implementations
  156. // can ignore this if it does not need to talk to another party securely.
  157. DialCreds credentials.TransportCredentials
  158. // CredsBundle is the credentials bundle that the Balancer can use.
  159. CredsBundle credentials.Bundle
  160. // Dialer is the custom dialer the Balancer implementation can use to dial
  161. // to a remote load balancer server. The Balancer implementations
  162. // can ignore this if it doesn't need to talk to remote balancer.
  163. Dialer func(context.Context, string) (net.Conn, error)
  164. // ChannelzParentID is the entity parent's channelz unique identification number.
  165. ChannelzParentID int64
  166. // Target contains the parsed address info of the dial target. It is the same resolver.Target as
  167. // passed to the resolver.
  168. // See the documentation for the resolver.Target type for details about what it contains.
  169. Target resolver.Target
  170. }
  171. // Builder creates a balancer.
  172. type Builder interface {
  173. // Build creates a new balancer with the ClientConn.
  174. Build(cc ClientConn, opts BuildOptions) Balancer
  175. // Name returns the name of balancers built by this builder.
  176. // It will be used to pick balancers (for example in service config).
  177. Name() string
  178. }
  179. // ConfigParser parses load balancer configs.
  180. type ConfigParser interface {
  181. // ParseConfig parses the JSON load balancer config provided into an
  182. // internal form or returns an error if the config is invalid. For future
  183. // compatibility reasons, unknown fields in the config should be ignored.
  184. ParseConfig(LoadBalancingConfigJSON json.RawMessage) (serviceconfig.LoadBalancingConfig, error)
  185. }
  186. // PickOptions is a type alias of PickInfo for legacy reasons.
  187. //
  188. // Deprecated: use PickInfo instead.
  189. type PickOptions = PickInfo
  190. // PickInfo contains additional information for the Pick operation.
  191. type PickInfo struct {
  192. // FullMethodName is the method name that NewClientStream() is called
  193. // with. The canonical format is /service/Method.
  194. FullMethodName string
  195. // Ctx is the RPC's context, and may contain relevant RPC-level information
  196. // like the outgoing header metadata.
  197. Ctx context.Context
  198. }
  199. // DoneInfo contains additional information for done.
  200. type DoneInfo struct {
  201. // Err is the rpc error the RPC finished with. It could be nil.
  202. Err error
  203. // Trailer contains the metadata from the RPC's trailer, if present.
  204. Trailer metadata.MD
  205. // BytesSent indicates if any bytes have been sent to the server.
  206. BytesSent bool
  207. // BytesReceived indicates if any byte has been received from the server.
  208. BytesReceived bool
  209. // ServerLoad is the load received from server. It's usually sent as part of
  210. // trailing metadata.
  211. //
  212. // The only supported type now is *orca_v1.LoadReport.
  213. ServerLoad interface{}
  214. }
  215. var (
  216. // ErrNoSubConnAvailable indicates no SubConn is available for pick().
  217. // gRPC will block the RPC until a new picker is available via UpdateBalancerState().
  218. ErrNoSubConnAvailable = errors.New("no SubConn is available")
  219. // ErrTransientFailure indicates all SubConns are in TransientFailure.
  220. // WaitForReady RPCs will block, non-WaitForReady RPCs will fail.
  221. ErrTransientFailure = TransientFailureError(errors.New("all SubConns are in TransientFailure"))
  222. )
  223. // Picker is used by gRPC to pick a SubConn to send an RPC.
  224. // Balancer is expected to generate a new picker from its snapshot every time its
  225. // internal state has changed.
  226. //
  227. // The pickers used by gRPC can be updated by ClientConn.UpdateBalancerState().
  228. //
  229. // Deprecated: use V2Picker instead
  230. type Picker interface {
  231. // Pick returns the SubConn to be used to send the RPC.
  232. // The returned SubConn must be one returned by NewSubConn().
  233. //
  234. // This functions is expected to return:
  235. // - a SubConn that is known to be READY;
  236. // - ErrNoSubConnAvailable if no SubConn is available, but progress is being
  237. // made (for example, some SubConn is in CONNECTING mode);
  238. // - other errors if no active connecting is happening (for example, all SubConn
  239. // are in TRANSIENT_FAILURE mode).
  240. //
  241. // If a SubConn is returned:
  242. // - If it is READY, gRPC will send the RPC on it;
  243. // - If it is not ready, or becomes not ready after it's returned, gRPC will
  244. // block until UpdateBalancerState() is called and will call pick on the
  245. // new picker. The done function returned from Pick(), if not nil, will be
  246. // called with nil error, no bytes sent and no bytes received.
  247. //
  248. // If the returned error is not nil:
  249. // - If the error is ErrNoSubConnAvailable, gRPC will block until UpdateBalancerState()
  250. // - If the error is ErrTransientFailure or implements IsTransientFailure()
  251. // bool, returning true:
  252. // - If the RPC is wait-for-ready, gRPC will block until UpdateBalancerState()
  253. // is called to pick again;
  254. // - Otherwise, RPC will fail with unavailable error.
  255. // - Else (error is other non-nil error):
  256. // - The RPC will fail with the error's status code, or Unknown if it is
  257. // not a status error.
  258. //
  259. // The returned done() function will be called once the rpc has finished,
  260. // with the final status of that RPC. If the SubConn returned is not a
  261. // valid SubConn type, done may not be called. done may be nil if balancer
  262. // doesn't care about the RPC status.
  263. Pick(ctx context.Context, info PickInfo) (conn SubConn, done func(DoneInfo), err error)
  264. }
  265. // PickResult contains information related to a connection chosen for an RPC.
  266. type PickResult struct {
  267. // SubConn is the connection to use for this pick, if its state is Ready.
  268. // If the state is not Ready, gRPC will block the RPC until a new Picker is
  269. // provided by the balancer (using ClientConn.UpdateState). The SubConn
  270. // must be one returned by ClientConn.NewSubConn.
  271. SubConn SubConn
  272. // Done is called when the RPC is completed. If the SubConn is not ready,
  273. // this will be called with a nil parameter. If the SubConn is not a valid
  274. // type, Done may not be called. May be nil if the balancer does not wish
  275. // to be notified when the RPC completes.
  276. Done func(DoneInfo)
  277. }
  278. type transientFailureError struct {
  279. error
  280. }
  281. func (e *transientFailureError) IsTransientFailure() bool { return true }
  282. // TransientFailureError wraps err in an error implementing
  283. // IsTransientFailure() bool, returning true.
  284. func TransientFailureError(err error) error {
  285. return &transientFailureError{error: err}
  286. }
  287. // V2Picker is used by gRPC to pick a SubConn to send an RPC.
  288. // Balancer is expected to generate a new picker from its snapshot every time its
  289. // internal state has changed.
  290. //
  291. // The pickers used by gRPC can be updated by ClientConn.UpdateBalancerState().
  292. type V2Picker interface {
  293. // Pick returns the connection to use for this RPC and related information.
  294. //
  295. // Pick should not block. If the balancer needs to do I/O or any blocking
  296. // or time-consuming work to service this call, it should return
  297. // ErrNoSubConnAvailable, and the Pick call will be repeated by gRPC when
  298. // the Picker is updated (using ClientConn.UpdateState).
  299. //
  300. // If an error is returned:
  301. //
  302. // - If the error is ErrNoSubConnAvailable, gRPC will block until a new
  303. // Picker is provided by the balancer (using ClientConn.UpdateState).
  304. //
  305. // - If the error implements IsTransientFailure() bool, returning true,
  306. // wait for ready RPCs will wait, but non-wait for ready RPCs will be
  307. // terminated with this error's Error() string and status code
  308. // Unavailable.
  309. //
  310. // - Any other errors terminate all RPCs with the code and message
  311. // provided. If the error is not a status error, it will be converted by
  312. // gRPC to a status error with code Unknown.
  313. Pick(info PickInfo) (PickResult, error)
  314. }
  315. // Balancer takes input from gRPC, manages SubConns, and collects and aggregates
  316. // the connectivity states.
  317. //
  318. // It also generates and updates the Picker used by gRPC to pick SubConns for RPCs.
  319. //
  320. // HandleSubConnectionStateChange, HandleResolvedAddrs and Close are guaranteed
  321. // to be called synchronously from the same goroutine.
  322. // There's no guarantee on picker.Pick, it may be called anytime.
  323. type Balancer interface {
  324. // HandleSubConnStateChange is called by gRPC when the connectivity state
  325. // of sc has changed.
  326. // Balancer is expected to aggregate all the state of SubConn and report
  327. // that back to gRPC.
  328. // Balancer should also generate and update Pickers when its internal state has
  329. // been changed by the new state.
  330. //
  331. // Deprecated: if V2Balancer is implemented by the Balancer,
  332. // UpdateSubConnState will be called instead.
  333. HandleSubConnStateChange(sc SubConn, state connectivity.State)
  334. // HandleResolvedAddrs is called by gRPC to send updated resolved addresses to
  335. // balancers.
  336. // Balancer can create new SubConn or remove SubConn with the addresses.
  337. // An empty address slice and a non-nil error will be passed if the resolver returns
  338. // non-nil error to gRPC.
  339. //
  340. // Deprecated: if V2Balancer is implemented by the Balancer,
  341. // UpdateClientConnState will be called instead.
  342. HandleResolvedAddrs([]resolver.Address, error)
  343. // Close closes the balancer. The balancer is not required to call
  344. // ClientConn.RemoveSubConn for its existing SubConns.
  345. Close()
  346. }
  347. // SubConnState describes the state of a SubConn.
  348. type SubConnState struct {
  349. // ConnectivityState is the connectivity state of the SubConn.
  350. ConnectivityState connectivity.State
  351. // ConnectionError is set if the ConnectivityState is TransientFailure,
  352. // describing the reason the SubConn failed. Otherwise, it is nil.
  353. ConnectionError error
  354. }
  355. // ClientConnState describes the state of a ClientConn relevant to the
  356. // balancer.
  357. type ClientConnState struct {
  358. ResolverState resolver.State
  359. // The parsed load balancing configuration returned by the builder's
  360. // ParseConfig method, if implemented.
  361. BalancerConfig serviceconfig.LoadBalancingConfig
  362. }
  363. // ErrBadResolverState may be returned by UpdateClientConnState to indicate a
  364. // problem with the provided name resolver data.
  365. var ErrBadResolverState = errors.New("bad resolver state")
  366. // V2Balancer is defined for documentation purposes. If a Balancer also
  367. // implements V2Balancer, its UpdateClientConnState method will be called
  368. // instead of HandleResolvedAddrs and its UpdateSubConnState will be called
  369. // instead of HandleSubConnStateChange.
  370. type V2Balancer interface {
  371. // UpdateClientConnState is called by gRPC when the state of the ClientConn
  372. // changes. If the error returned is ErrBadResolverState, the ClientConn
  373. // will begin calling ResolveNow on the active name resolver with
  374. // exponential backoff until a subsequent call to UpdateClientConnState
  375. // returns a nil error. Any other errors are currently ignored.
  376. UpdateClientConnState(ClientConnState) error
  377. // ResolverError is called by gRPC when the name resolver reports an error.
  378. ResolverError(error)
  379. // UpdateSubConnState is called by gRPC when the state of a SubConn
  380. // changes.
  381. UpdateSubConnState(SubConn, SubConnState)
  382. // Close closes the balancer. The balancer is not required to call
  383. // ClientConn.RemoveSubConn for its existing SubConns.
  384. Close()
  385. }
  386. // ConnectivityStateEvaluator takes the connectivity states of multiple SubConns
  387. // and returns one aggregated connectivity state.
  388. //
  389. // It's not thread safe.
  390. type ConnectivityStateEvaluator struct {
  391. numReady uint64 // Number of addrConns in ready state.
  392. numConnecting uint64 // Number of addrConns in connecting state.
  393. }
  394. // RecordTransition records state change happening in subConn and based on that
  395. // it evaluates what aggregated state should be.
  396. //
  397. // - If at least one SubConn in Ready, the aggregated state is Ready;
  398. // - Else if at least one SubConn in Connecting, the aggregated state is Connecting;
  399. // - Else the aggregated state is TransientFailure.
  400. //
  401. // Idle and Shutdown are not considered.
  402. func (cse *ConnectivityStateEvaluator) RecordTransition(oldState, newState connectivity.State) connectivity.State {
  403. // Update counters.
  404. for idx, state := range []connectivity.State{oldState, newState} {
  405. updateVal := 2*uint64(idx) - 1 // -1 for oldState and +1 for new.
  406. switch state {
  407. case connectivity.Ready:
  408. cse.numReady += updateVal
  409. case connectivity.Connecting:
  410. cse.numConnecting += updateVal
  411. }
  412. }
  413. // Evaluate.
  414. if cse.numReady > 0 {
  415. return connectivity.Ready
  416. }
  417. if cse.numConnecting > 0 {
  418. return connectivity.Connecting
  419. }
  420. return connectivity.TransientFailure
  421. }