123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- package goreferrer
- import (
- "encoding/json"
- "io"
- "net/url"
- "path"
- "strings"
- )
- type DomainRule struct {
- Type ReferrerType
- Label string
- Domain string
- Parameters []string
- }
- type UaRule struct {
- Url string
- Domain string
- Tld string
- }
- func (u UaRule) RegisteredDomain() string {
- if u.Domain == "" || u.Tld == "" {
- return ""
- }
- return u.Domain + "." + u.Tld
- }
- type RuleSet struct {
- DomainRules map[string]DomainRule
- UaRules map[string]UaRule
- }
- func NewRuleSet() RuleSet {
- return RuleSet{
- DomainRules: make(map[string]DomainRule),
- UaRules: make(map[string]UaRule),
- }
- }
- func (r RuleSet) Merge(other RuleSet) {
- for k, v := range other.DomainRules {
- r.DomainRules[k] = v
- }
- for k, v := range other.UaRules {
- r.UaRules[k] = v
- }
- }
- func (r RuleSet) Parse(URL string) Referrer {
- return r.ParseWith(URL, nil, "")
- }
- func (r RuleSet) ParseWith(URL string, domains []string, agent string) Referrer {
- ref := Referrer{
- Type: Indirect,
- URL: strings.Trim(URL, " \t\r\n"),
- }
- uaRule := r.getUaRule(agent)
- if ref.URL == "" {
- ref.URL = uaRule.Url
- }
- if ref.URL == "" {
- ref.Type = Direct
- return ref
- }
- u, ok := parseRichUrl(ref.URL)
- if !ok {
- ref.Type = Invalid
- return ref
- }
- ref.Subdomain = u.Subdomain
- ref.Domain = u.Domain
- ref.Tld = u.Tld
- ref.Path = cleanPath(u.Path)
- if ref.Domain == "" {
- ref.Domain = uaRule.Domain
- }
- if ref.Tld == "" {
- ref.Tld = uaRule.Tld
- }
- for _, domain := range domains {
- if u.Host == domain {
- ref.Type = Direct
- return ref
- }
- }
- variations := []string{
- path.Join(u.Host, u.Path),
- path.Join(u.RegisteredDomain(), u.Path),
- u.Host,
- u.RegisteredDomain(),
- }
- for _, host := range variations {
- domainRule, exists := r.DomainRules[host]
- if !exists {
- continue
- }
- query := getQuery(u.Query(), domainRule.Parameters)
- if query == "" {
- values, err := url.ParseQuery(u.Fragment)
- if err == nil {
- query = getQuery(values, domainRule.Parameters)
- }
- }
- ref.Type = domainRule.Type
- ref.Label = domainRule.Label
- ref.Query = query
- ref.GoogleType = googleSearchType(ref)
- return ref
- }
- ref.Label = strings.Title(u.Domain)
- return ref
- }
- func (r *RuleSet) getUaRule(agent string) UaRule {
- for pattern, rule := range r.UaRules {
- if strings.Contains(agent, pattern) {
- return rule
- }
- }
- return UaRule{}
- }
- func getQuery(values url.Values, params []string) string {
- for _, param := range params {
- query := values.Get(param)
- if query != "" {
- return query
- }
- }
- return ""
- }
- func googleSearchType(ref Referrer) GoogleSearchType {
- if ref.Type != Search || !strings.Contains(ref.Label, "Google") {
- return NotGoogleSearch
- }
- if strings.HasPrefix(ref.Path, "/aclk") || strings.HasPrefix(ref.Path, "/pagead/aclk") {
- return Adwords
- }
- return OrganicSearch
- }
- func cleanPath(path string) string {
- if i := strings.Index(path, ";"); i != -1 {
- return path[:i]
- }
- return path
- }
- type jsonRule struct {
- Domains []string
- Parameters []string
- }
- type jsonRules struct {
- Email map[string]jsonRule
- Search map[string]jsonRule
- Social map[string]jsonRule
- }
- func LoadJsonDomainRules(reader io.Reader) (map[string]DomainRule, error) {
- var decoded jsonRules
- if err := json.NewDecoder(reader).Decode(&decoded); err != nil {
- return nil, err
- }
- rules := NewRuleSet()
- rules.Merge(extractRules(decoded.Email, Email))
- rules.Merge(extractRules(decoded.Search, Search))
- rules.Merge(extractRules(decoded.Social, Social))
- return rules.DomainRules, nil
- }
- func extractRules(ruleMap map[string]jsonRule, Type ReferrerType) RuleSet {
- rules := NewRuleSet()
- for label, jsonRule := range ruleMap {
- for _, domain := range jsonRule.Domains {
- rules.DomainRules[domain] = DomainRule{
- Type: Type,
- Label: label,
- Domain: domain,
- Parameters: jsonRule.Parameters,
- }
- }
- }
- return rules
- }
|