Stripping out select querystring attribute/value pairs so varnish will not vary cache by them

前端 未结 7 1054
名媛妹妹
名媛妹妹 2020-12-29 10:57

My goal is to \"whitelist\" certain querystring attributes and their values so varnish will not vary cache between the urls.

Example:

Url 1: http:/         


        
7条回答
  •  陌清茗
    陌清茗 (楼主)
    2020-12-29 11:17

    I improved upon runamok's answer a bit by adding support for empty params and sorting the remaining ones, here's a full vtc file that I implemented to validate correctness.

    varnishtest "Test for URL normalization - Varnish 4"
    
    server s1 {
      rxreq
      txresp -hdr "Backend: up" -body "Some content"
    } -repeat 11 -start
    
    varnish v1 -vcl+backend {
      import std;
    
      sub vcl_recv {
        # Strip out marketing variables. They are only needed by
        # the javascript running on the page.
        if (req.url ~ "(\?|&)(gclid|cx|ie|cof|siteurl|zanpid|origin|utm_[a-z]+|mr:[A-z]+)(=|&|$)") {
          # Process params with value.
          set req.url = regsuball(req.url, "(gclid|cx|ie|cof|siteurl|zanpid|origin|utm_[a-z]+|mr:[A-z]+)=[%.\-_A-z0-9]+&?", "");
          # Process params without value.
          set req.url = regsuball(req.url, "(gclid|cx|ie|cof|siteurl|zanpid|origin|utm_[a-z]+|mr:[A-z]+)=?(&|$)", "");
        }
        # Remove trailing '?', '?&'
        set req.url = regsub(req.url, "(\?&?)$", "");
        # Sort query params, also removes trailing '&'
        set req.url = std.querysort(req.url);
      }
    
      sub vcl_deliver {
        set resp.http.X-Normalized-URL = req.url;
      }
    } -start
    
    client c1 {
      # Basic, no params.
      txreq -url "/test/some-url"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url"
    
      # One blacklisted param.
      txreq -url "/test/some-url?utm_campaign=1"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url"
    
      # One blacklisted param, without value.
      txreq -url "/test/some-url?utm_campaign"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url"
    
      # Two blacklisted params.
      txreq -url "/test/some-url?utm_campaign=1&origin=hpg"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url"
    
      # Two blacklisted params, one without value
      txreq -url "/test/some-url?utm_campaign&origin=123-abc%20"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url"
    
      # Two blacklisted params, both without value
      txreq -url "/test/some-url?utm_campaign&origin="
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url"
    
      # Three blacklisted params.
      txreq -url "/test/some-url?utm_campaign=ABC&origin=hpg&siteurl=br2"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url"
    
      # Three blacklisted params, two without value
      txreq -url "/test/some-url?utm_campaign=1&origin=&siteurl"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url"
    
      # Three blacklisted params; one param to keep, with space encoded as +.
      txreq -url "/test/some-url?qss=hello+one&utm_campaign=some-value&origin=hpg&siteurl=br2"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url?qss=hello+one"
    
      # Three blacklisted params; one param to keep, with space encoded as %20, passed in-between blacklisted ones.
      txreq -url "/test/some-url?utm_campaign=1&qss=hello%20one&origin=hpg&siteurl=br2"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url?qss=hello%20one"
    
      # Three blacklisted params; three params to keep.
      txreq -url "/test/some-url?utm_campaign=a-value&qss=hello+one&origin=hpg&siteurl=br2&keep2=abc&keep1"
      rxresp
      expect resp.http.X-Normalized-URL == "/test/some-url?keep1&keep2=abc&qss=hello+one"
    } -run
    
    varnish v1 -expect client_req == 11
    

提交回复
热议问题