Transproxpy. default tip
authorRadek Brich <radek.brich@devl.cz>
Sun, 23 Feb 2014 21:32:04 +0100 (2014-02-23)
changeset 0 cef7ea3df9c2
Transproxpy.
transproxpy.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/transproxpy.py	Sun Feb 23 21:32:04 2014 +0100
@@ -0,0 +1,180 @@
+#! /usr/bin/env python3
+
+"""transproxpy - Simple forwarding HTTP server
+
+Works as transparent proxy - client doesn't need to know about it
+and nothing has to be set on end-user machine.
+
+Transproxpy listens on specified port, resends requests to remote server
+(as found in Host header) and resends responses from the server back to the client.
+
+Should be installed on gateway machine, which routes traffic from clients
+to remote servers. Configure iptables like this:
+
+    # Redirect all requests incoming from interface wlan0 and destined to 10.0.0.5:80
+    iptables -t nat -A PREROUTING -i wlan0 -p tcp -d 10.0.0.5 --dport 80 -j REDIRECT --to-port 8080
+
+Intented usage is to monitor HTTP traffic and manipulate request/response headers and content,
+for debugging purposes and fun.
+
+2014-02-23 Radek Brich
+
+"""
+
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from http.client import HTTPConnection, HTTPException
+from socketserver import ThreadingMixIn
+import logging
+import gzip
+import re
+
+
+# Settings
+proxy_port = 8080
+log_level = logging.INFO
+
+
+class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
+
+    pass
+
+
+class DumpHeaders:
+
+    def __init__(self, headers):
+        self.headers = headers
+
+    def __str__(self):
+        return '\n'.join('    %-19s %s' % (k+':', v) for k, v in self.headers.items())
+
+
+class ProxyRequestHandler(BaseHTTPRequestHandler):
+
+    # Allow persistent connections
+    protocol_version = 'HTTP/1.1'
+
+    def __init__(self, *args, **kwargs):
+        for command in ('GET', 'POST'):
+            setattr(self, 'do_' + command, self._handle_request)
+        self._fconn = None
+        self.log = logging.getLogger(__name__)
+        BaseHTTPRequestHandler.__init__(self, *args, **kwargs)
+
+    def _handle_request(self):
+        self.log.info('Orig request: %s %s', self.command, self.path)
+        self.log.debug('Headers:\n%s', DumpHeaders(self.headers))
+        resp = self._forward_request()
+        self.response_status = resp.status
+        self.response_reason = resp.reason
+        self.response_headers = resp.headers
+        self.response_body = resp.read(int(resp.headers.get('content-length')))
+        self.log.info('Orig response: %s %s', self.response_status, self.response_reason)
+        self.log.debug('Headers:\n%s', DumpHeaders(self.response_headers))
+        self.manipulate_response()
+        self.response_headers.replace_header('content-length', str(len(self.response_body)))
+        self.log.info('Proxy response: %s %s', self.response_status, self.response_reason)
+        self.log.debug('Headers:\n%s', DumpHeaders(self.response_headers))
+        self.send_response(self.response_status, self.response_reason)
+        for name, value in self.response_headers.items():
+            self.send_header(name, value)
+        self.end_headers()
+        self.log.debug('start write')
+        self.wfile.write(self.response_body)
+        self.log.debug('done write')
+
+    def _forward_request(self):
+        if self.command == 'POST':
+            content_len = int(self.headers.get('content-length'))
+            body = self.rfile.read(content_len)
+            self.log.debug('Body: %s', body)
+        else:
+            body = None
+        if not self._fconn:
+            self._connect_target()
+        self.log.info('Proxy request: %s %s', self.command, self.path)
+        try:
+            self._fconn.request(self.command, self.path, body, self.headers)
+            return self._fconn.getresponse()
+        except HTTPException:
+            # Try again, in case that server just closed connection.
+            self._fconn.close()
+            self._connect_target()
+            self._fconn.request(self.command, self.path, body, self.headers)
+            return self._fconn.getresponse()
+
+    def _connect_target(self):
+        host = self.headers.get('host')
+        self.log.info('Connect to %s:80', host)
+        self._fconn = HTTPConnection(host, timeout=30)
+
+    def setup(self):
+        super().setup()
+        self.log.info('Connect from %s:%s', *self.client_address)
+
+    def finish(self):
+        self.log.info('--- Disconnect ---')
+        super().finish()
+        if self._fconn:
+            self._fconn.close()
+            self._fconn = None
+
+    def handle_one_request(self):
+        self.log.info('--- Request ---')
+        super().handle_one_request()
+
+    def log_error(self, fmt, *args):
+        self.log.error(fmt, *args)
+
+    def log_message(self, format_, *args):
+        pass
+
+    def _prepare_body(self):
+        if self.response_headers.get('content-encoding') == 'gzip':
+            self.response_body = gzip.decompress(self.response_body)
+
+    def _finish_body(self):
+        if self.response_headers.get('content-encoding') == 'gzip':
+            self.response_body = gzip.compress(self.response_body)
+
+    def manipulate_response(self):
+        """Manipulate response from server.
+
+        Request parameters are available as usual, see BaseHTTPRequestHandler.
+
+        Response from server is available in variables (and can be altered):
+        * self.response_status
+        * self.response_reason
+        * self.response_headers
+        * self.response_body
+
+        Before touching response_body, call `_prepare_body` and when finished call `_finish_body`.
+        These take care of compression and content_length header.
+
+        """
+        pass
+
+
+class ExampleRequestHandler(ProxyRequestHandler):
+
+    """Example proxying request handler.
+
+    Manipulate content of one JavaScript file, leave rest as is.
+
+    """
+
+    def manipulate_response(self):
+        # Enable web control interface for AV receiver Yamaha HTR-4066
+        # by moving the model name from "low" list a little higher :-)
+        if self.path == '/JavaScripts/scr0.js':
+            self._prepare_body()
+            self.response_body = re.sub(br'^ "HTR-4066",', br'', self.response_body, flags=re.MULTILINE)
+            self.response_body = re.sub(br'^( var g_modelName6xx = \[)', br'\1"HTR-4066",', self.response_body, flags=re.MULTILINE)
+            self._finish_body()
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=log_level,
+        format="%(asctime)s %(threadName)s %(levelname)s: %(message)s", datefmt="%H:%M:%S")
+    transproxpy = ThreadingHTTPServer(('', proxy_port), ExampleRequestHandler)
+    transproxpy.serve_forever()
+