# HG changeset patch # User Radek Brich # Date 1393187524 -3600 # Node ID cef7ea3df9c21444c83ad8957905f7557ec12a2a Transproxpy. diff -r 000000000000 -r cef7ea3df9c2 transproxpy.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/transproxpy.py Sun Feb 23 21:32:04 2014 +0100 @@ -0,0 +1,180 @@ +#! /usr/bin/env python3 + +"""transproxpy - Simple forwarding HTTP server + +Works as transparent proxy - client doesn't need to know about it +and nothing has to be set on end-user machine. + +Transproxpy listens on specified port, resends requests to remote server +(as found in Host header) and resends responses from the server back to the client. + +Should be installed on gateway machine, which routes traffic from clients +to remote servers. Configure iptables like this: + + # Redirect all requests incoming from interface wlan0 and destined to 10.0.0.5:80 + iptables -t nat -A PREROUTING -i wlan0 -p tcp -d 10.0.0.5 --dport 80 -j REDIRECT --to-port 8080 + +Intented usage is to monitor HTTP traffic and manipulate request/response headers and content, +for debugging purposes and fun. + +2014-02-23 Radek Brich + +""" + +from http.server import HTTPServer, BaseHTTPRequestHandler +from http.client import HTTPConnection, HTTPException +from socketserver import ThreadingMixIn +import logging +import gzip +import re + + +# Settings +proxy_port = 8080 +log_level = logging.INFO + + +class ThreadingHTTPServer(ThreadingMixIn, HTTPServer): + + pass + + +class DumpHeaders: + + def __init__(self, headers): + self.headers = headers + + def __str__(self): + return '\n'.join(' %-19s %s' % (k+':', v) for k, v in self.headers.items()) + + +class ProxyRequestHandler(BaseHTTPRequestHandler): + + # Allow persistent connections + protocol_version = 'HTTP/1.1' + + def __init__(self, *args, **kwargs): + for command in ('GET', 'POST'): + setattr(self, 'do_' + command, self._handle_request) + self._fconn = None + self.log = logging.getLogger(__name__) + BaseHTTPRequestHandler.__init__(self, *args, **kwargs) + + def _handle_request(self): + self.log.info('Orig request: %s %s', self.command, self.path) + self.log.debug('Headers:\n%s', DumpHeaders(self.headers)) + resp = self._forward_request() + self.response_status = resp.status + self.response_reason = resp.reason + self.response_headers = resp.headers + self.response_body = resp.read(int(resp.headers.get('content-length'))) + self.log.info('Orig response: %s %s', self.response_status, self.response_reason) + self.log.debug('Headers:\n%s', DumpHeaders(self.response_headers)) + self.manipulate_response() + self.response_headers.replace_header('content-length', str(len(self.response_body))) + self.log.info('Proxy response: %s %s', self.response_status, self.response_reason) + self.log.debug('Headers:\n%s', DumpHeaders(self.response_headers)) + self.send_response(self.response_status, self.response_reason) + for name, value in self.response_headers.items(): + self.send_header(name, value) + self.end_headers() + self.log.debug('start write') + self.wfile.write(self.response_body) + self.log.debug('done write') + + def _forward_request(self): + if self.command == 'POST': + content_len = int(self.headers.get('content-length')) + body = self.rfile.read(content_len) + self.log.debug('Body: %s', body) + else: + body = None + if not self._fconn: + self._connect_target() + self.log.info('Proxy request: %s %s', self.command, self.path) + try: + self._fconn.request(self.command, self.path, body, self.headers) + return self._fconn.getresponse() + except HTTPException: + # Try again, in case that server just closed connection. + self._fconn.close() + self._connect_target() + self._fconn.request(self.command, self.path, body, self.headers) + return self._fconn.getresponse() + + def _connect_target(self): + host = self.headers.get('host') + self.log.info('Connect to %s:80', host) + self._fconn = HTTPConnection(host, timeout=30) + + def setup(self): + super().setup() + self.log.info('Connect from %s:%s', *self.client_address) + + def finish(self): + self.log.info('--- Disconnect ---') + super().finish() + if self._fconn: + self._fconn.close() + self._fconn = None + + def handle_one_request(self): + self.log.info('--- Request ---') + super().handle_one_request() + + def log_error(self, fmt, *args): + self.log.error(fmt, *args) + + def log_message(self, format_, *args): + pass + + def _prepare_body(self): + if self.response_headers.get('content-encoding') == 'gzip': + self.response_body = gzip.decompress(self.response_body) + + def _finish_body(self): + if self.response_headers.get('content-encoding') == 'gzip': + self.response_body = gzip.compress(self.response_body) + + def manipulate_response(self): + """Manipulate response from server. + + Request parameters are available as usual, see BaseHTTPRequestHandler. + + Response from server is available in variables (and can be altered): + * self.response_status + * self.response_reason + * self.response_headers + * self.response_body + + Before touching response_body, call `_prepare_body` and when finished call `_finish_body`. + These take care of compression and content_length header. + + """ + pass + + +class ExampleRequestHandler(ProxyRequestHandler): + + """Example proxying request handler. + + Manipulate content of one JavaScript file, leave rest as is. + + """ + + def manipulate_response(self): + # Enable web control interface for AV receiver Yamaha HTR-4066 + # by moving the model name from "low" list a little higher :-) + if self.path == '/JavaScripts/scr0.js': + self._prepare_body() + self.response_body = re.sub(br'^ "HTR-4066",', br'', self.response_body, flags=re.MULTILINE) + self.response_body = re.sub(br'^( var g_modelName6xx = \[)', br'\1"HTR-4066",', self.response_body, flags=re.MULTILINE) + self._finish_body() + + +if __name__ == "__main__": + logging.basicConfig(level=log_level, + format="%(asctime)s %(threadName)s %(levelname)s: %(message)s", datefmt="%H:%M:%S") + transproxpy = ThreadingHTTPServer(('', proxy_port), ExampleRequestHandler) + transproxpy.serve_forever() +