From 3ec640aebb1bae0400fc8e4c62839bdda70d9131 Mon Sep 17 00:00:00 2001 From: 0x1b5b <174032746+0x1b5b@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:32:43 +0200 Subject: [PATCH 1/2] Remove zstd from the Accept-Encoding HTTP header. --- pywb/rewrite/rewriteinputreq.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pywb/rewrite/rewriteinputreq.py b/pywb/rewrite/rewriteinputreq.py index 6eab1ce08..07abd6516 100644 --- a/pywb/rewrite/rewriteinputreq.py +++ b/pywb/rewrite/rewriteinputreq.py @@ -93,11 +93,13 @@ def get_req_headers(self): if self.splits: value = self.splits.scheme - elif not has_brotli and name == 'HTTP_ACCEPT_ENCODING' and 'br' in value: + elif name == 'HTTP_ACCEPT_ENCODING': # if brotli not available, remove 'br' from accept-encoding to avoid # capture brotli encoded content + # We have to remove zstd from the list of accepted encodings as warcio does not support it. + disallowed_encodings = ('zstd') if has_brotli else ('zstd', 'br') name = 'Accept-Encoding' - value = ','.join([enc for enc in value.split(',') if enc.strip() != 'br']) + value = ','.join([enc for enc in value.split(',') if enc.strip() not in disallowed_encodings]) elif name.startswith('HTTP_'): name = name[5:].title().replace('_', '-') From 2ee4e90988abd2a880d3aab0aef0a01dc8040f65 Mon Sep 17 00:00:00 2001 From: 0x1b5b <174032746+0x1b5b@users.noreply.github.com> Date: Mon, 4 Aug 2025 17:34:20 +0200 Subject: [PATCH 2/2] Fix in response to PR comments. --- pywb/rewrite/rewriteinputreq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pywb/rewrite/rewriteinputreq.py b/pywb/rewrite/rewriteinputreq.py index 07abd6516..6c3ffd021 100644 --- a/pywb/rewrite/rewriteinputreq.py +++ b/pywb/rewrite/rewriteinputreq.py @@ -97,7 +97,7 @@ def get_req_headers(self): # if brotli not available, remove 'br' from accept-encoding to avoid # capture brotli encoded content # We have to remove zstd from the list of accepted encodings as warcio does not support it. - disallowed_encodings = ('zstd') if has_brotli else ('zstd', 'br') + disallowed_encodings = ('zstd',) if has_brotli else ('zstd', 'br') name = 'Accept-Encoding' value = ','.join([enc for enc in value.split(',') if enc.strip() not in disallowed_encodings])