transproxpy.py
author Radek Brich <radek.brich@devl.cz>
Sun, 23 Feb 2014 21:32:04 +0100
changeset 0 cef7ea3df9c2
permissions -rwxr-xr-x
Transproxpy.

#! /usr/bin/env python3

"""transproxpy - Simple forwarding HTTP server

Works as transparent proxy - client doesn't need to know about it
and nothing has to be set on end-user machine.

Transproxpy listens on specified port, resends requests to remote server
(as found in Host header) and resends responses from the server back to the client.

Should be installed on gateway machine, which routes traffic from clients
to remote servers. Configure iptables like this:

    # Redirect all requests incoming from interface wlan0 and destined to 10.0.0.5:80
    iptables -t nat -A PREROUTING -i wlan0 -p tcp -d 10.0.0.5 --dport 80 -j REDIRECT --to-port 8080

Intented usage is to monitor HTTP traffic and manipulate request/response headers and content,
for debugging purposes and fun.

2014-02-23 Radek Brich

"""

from http.server import HTTPServer, BaseHTTPRequestHandler
from http.client import HTTPConnection, HTTPException
from socketserver import ThreadingMixIn
import logging
import gzip
import re


# Settings
proxy_port = 8080
log_level = logging.INFO


class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):

    pass


class DumpHeaders:

    def __init__(self, headers):
        self.headers = headers

    def __str__(self):
        return '\n'.join('    %-19s %s' % (k+':', v) for k, v in self.headers.items())


class ProxyRequestHandler(BaseHTTPRequestHandler):

    # Allow persistent connections
    protocol_version = 'HTTP/1.1'

    def __init__(self, *args, **kwargs):
        for command in ('GET', 'POST'):
            setattr(self, 'do_' + command, self._handle_request)
        self._fconn = None
        self.log = logging.getLogger(__name__)
        BaseHTTPRequestHandler.__init__(self, *args, **kwargs)

    def _handle_request(self):
        self.log.info('Orig request: %s %s', self.command, self.path)
        self.log.debug('Headers:\n%s', DumpHeaders(self.headers))
        resp = self._forward_request()
        self.response_status = resp.status
        self.response_reason = resp.reason
        self.response_headers = resp.headers
        self.response_body = resp.read(int(resp.headers.get('content-length')))
        self.log.info('Orig response: %s %s', self.response_status, self.response_reason)
        self.log.debug('Headers:\n%s', DumpHeaders(self.response_headers))
        self.manipulate_response()
        self.response_headers.replace_header('content-length', str(len(self.response_body)))
        self.log.info('Proxy response: %s %s', self.response_status, self.response_reason)
        self.log.debug('Headers:\n%s', DumpHeaders(self.response_headers))
        self.send_response(self.response_status, self.response_reason)
        for name, value in self.response_headers.items():
            self.send_header(name, value)
        self.end_headers()
        self.log.debug('start write')
        self.wfile.write(self.response_body)
        self.log.debug('done write')

    def _forward_request(self):
        if self.command == 'POST':
            content_len = int(self.headers.get('content-length'))
            body = self.rfile.read(content_len)
            self.log.debug('Body: %s', body)
        else:
            body = None
        if not self._fconn:
            self._connect_target()
        self.log.info('Proxy request: %s %s', self.command, self.path)
        try:
            self._fconn.request(self.command, self.path, body, self.headers)
            return self._fconn.getresponse()
        except HTTPException:
            # Try again, in case that server just closed connection.
            self._fconn.close()
            self._connect_target()
            self._fconn.request(self.command, self.path, body, self.headers)
            return self._fconn.getresponse()

    def _connect_target(self):
        host = self.headers.get('host')
        self.log.info('Connect to %s:80', host)
        self._fconn = HTTPConnection(host, timeout=30)

    def setup(self):
        super().setup()
        self.log.info('Connect from %s:%s', *self.client_address)

    def finish(self):
        self.log.info('--- Disconnect ---')
        super().finish()
        if self._fconn:
            self._fconn.close()
            self._fconn = None

    def handle_one_request(self):
        self.log.info('--- Request ---')
        super().handle_one_request()

    def log_error(self, fmt, *args):
        self.log.error(fmt, *args)

    def log_message(self, format_, *args):
        pass

    def _prepare_body(self):
        if self.response_headers.get('content-encoding') == 'gzip':
            self.response_body = gzip.decompress(self.response_body)

    def _finish_body(self):
        if self.response_headers.get('content-encoding') == 'gzip':
            self.response_body = gzip.compress(self.response_body)

    def manipulate_response(self):
        """Manipulate response from server.

        Request parameters are available as usual, see BaseHTTPRequestHandler.

        Response from server is available in variables (and can be altered):
        * self.response_status
        * self.response_reason
        * self.response_headers
        * self.response_body

        Before touching response_body, call `_prepare_body` and when finished call `_finish_body`.
        These take care of compression and content_length header.

        """
        pass


class ExampleRequestHandler(ProxyRequestHandler):

    """Example proxying request handler.

    Manipulate content of one JavaScript file, leave rest as is.

    """

    def manipulate_response(self):
        # Enable web control interface for AV receiver Yamaha HTR-4066
        # by moving the model name from "low" list a little higher :-)
        if self.path == '/JavaScripts/scr0.js':
            self._prepare_body()
            self.response_body = re.sub(br'^ "HTR-4066",', br'', self.response_body, flags=re.MULTILINE)
            self.response_body = re.sub(br'^( var g_modelName6xx = \[)', br'\1"HTR-4066",', self.response_body, flags=re.MULTILINE)
            self._finish_body()


if __name__ == "__main__":
    logging.basicConfig(level=log_level,
        format="%(asctime)s %(threadName)s %(levelname)s: %(message)s", datefmt="%H:%M:%S")
    transproxpy = ThreadingHTTPServer(('', proxy_port), ExampleRequestHandler)
    transproxpy.serve_forever()