rules.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. package goreferrer
  2. import (
  3. "encoding/json"
  4. "io"
  5. "net/url"
  6. "path"
  7. "strings"
  8. )
  9. type DomainRule struct {
  10. Type ReferrerType
  11. Label string
  12. Domain string
  13. Parameters []string
  14. }
  15. type UaRule struct {
  16. Url string
  17. Domain string
  18. Tld string
  19. }
  20. func (u UaRule) RegisteredDomain() string {
  21. if u.Domain == "" || u.Tld == "" {
  22. return ""
  23. }
  24. return u.Domain + "." + u.Tld
  25. }
  26. type RuleSet struct {
  27. DomainRules map[string]DomainRule
  28. UaRules map[string]UaRule
  29. }
  30. func NewRuleSet() RuleSet {
  31. return RuleSet{
  32. DomainRules: make(map[string]DomainRule),
  33. UaRules: make(map[string]UaRule),
  34. }
  35. }
  36. func (r RuleSet) Merge(other RuleSet) {
  37. for k, v := range other.DomainRules {
  38. r.DomainRules[k] = v
  39. }
  40. for k, v := range other.UaRules {
  41. r.UaRules[k] = v
  42. }
  43. }
  44. func (r RuleSet) Parse(URL string) Referrer {
  45. return r.ParseWith(URL, nil, "")
  46. }
  47. func (r RuleSet) ParseWith(URL string, domains []string, agent string) Referrer {
  48. ref := Referrer{
  49. Type: Indirect,
  50. URL: strings.Trim(URL, " \t\r\n"),
  51. }
  52. uaRule := r.getUaRule(agent)
  53. if ref.URL == "" {
  54. ref.URL = uaRule.Url
  55. }
  56. if ref.URL == "" {
  57. ref.Type = Direct
  58. return ref
  59. }
  60. u, ok := parseRichUrl(ref.URL)
  61. if !ok {
  62. ref.Type = Invalid
  63. return ref
  64. }
  65. ref.Subdomain = u.Subdomain
  66. ref.Domain = u.Domain
  67. ref.Tld = u.Tld
  68. ref.Path = cleanPath(u.Path)
  69. if ref.Domain == "" {
  70. ref.Domain = uaRule.Domain
  71. }
  72. if ref.Tld == "" {
  73. ref.Tld = uaRule.Tld
  74. }
  75. for _, domain := range domains {
  76. if u.Host == domain {
  77. ref.Type = Direct
  78. return ref
  79. }
  80. }
  81. variations := []string{
  82. path.Join(u.Host, u.Path),
  83. path.Join(u.RegisteredDomain(), u.Path),
  84. u.Host,
  85. u.RegisteredDomain(),
  86. }
  87. for _, host := range variations {
  88. domainRule, exists := r.DomainRules[host]
  89. if !exists {
  90. continue
  91. }
  92. query := getQuery(u.Query(), domainRule.Parameters)
  93. if query == "" {
  94. values, err := url.ParseQuery(u.Fragment)
  95. if err == nil {
  96. query = getQuery(values, domainRule.Parameters)
  97. }
  98. }
  99. ref.Type = domainRule.Type
  100. ref.Label = domainRule.Label
  101. ref.Query = query
  102. ref.GoogleType = googleSearchType(ref)
  103. return ref
  104. }
  105. ref.Label = strings.Title(u.Domain)
  106. return ref
  107. }
  108. func (r *RuleSet) getUaRule(agent string) UaRule {
  109. for pattern, rule := range r.UaRules {
  110. if strings.Contains(agent, pattern) {
  111. return rule
  112. }
  113. }
  114. return UaRule{}
  115. }
  116. func getQuery(values url.Values, params []string) string {
  117. for _, param := range params {
  118. query := values.Get(param)
  119. if query != "" {
  120. return query
  121. }
  122. }
  123. return ""
  124. }
  125. func googleSearchType(ref Referrer) GoogleSearchType {
  126. if ref.Type != Search || !strings.Contains(ref.Label, "Google") {
  127. return NotGoogleSearch
  128. }
  129. if strings.HasPrefix(ref.Path, "/aclk") || strings.HasPrefix(ref.Path, "/pagead/aclk") {
  130. return Adwords
  131. }
  132. return OrganicSearch
  133. }
  134. func cleanPath(path string) string {
  135. if i := strings.Index(path, ";"); i != -1 {
  136. return path[:i]
  137. }
  138. return path
  139. }
  140. type jsonRule struct {
  141. Domains []string
  142. Parameters []string
  143. }
  144. type jsonRules struct {
  145. Email map[string]jsonRule
  146. Search map[string]jsonRule
  147. Social map[string]jsonRule
  148. }
  149. func LoadJsonDomainRules(reader io.Reader) (map[string]DomainRule, error) {
  150. var decoded jsonRules
  151. if err := json.NewDecoder(reader).Decode(&decoded); err != nil {
  152. return nil, err
  153. }
  154. rules := NewRuleSet()
  155. rules.Merge(extractRules(decoded.Email, Email))
  156. rules.Merge(extractRules(decoded.Search, Search))
  157. rules.Merge(extractRules(decoded.Social, Social))
  158. return rules.DomainRules, nil
  159. }
  160. func extractRules(ruleMap map[string]jsonRule, Type ReferrerType) RuleSet {
  161. rules := NewRuleSet()
  162. for label, jsonRule := range ruleMap {
  163. for _, domain := range jsonRule.Domains {
  164. rules.DomainRules[domain] = DomainRule{
  165. Type: Type,
  166. Label: label,
  167. Domain: domain,
  168. Parameters: jsonRule.Parameters,
  169. }
  170. }
  171. }
  172. return rules
  173. }