transproxpy.py
changeset 0 cef7ea3df9c2
equal deleted inserted replaced
-1:000000000000 0:cef7ea3df9c2
       
     1 #! /usr/bin/env python3
       
     2 
       
     3 """transproxpy - Simple forwarding HTTP server
       
     4 
       
     5 Works as transparent proxy - client doesn't need to know about it
       
     6 and nothing has to be set on end-user machine.
       
     7 
       
     8 Transproxpy listens on specified port, resends requests to remote server
       
     9 (as found in Host header) and resends responses from the server back to the client.
       
    10 
       
    11 Should be installed on gateway machine, which routes traffic from clients
       
    12 to remote servers. Configure iptables like this:
       
    13 
       
    14     # Redirect all requests incoming from interface wlan0 and destined to 10.0.0.5:80
       
    15     iptables -t nat -A PREROUTING -i wlan0 -p tcp -d 10.0.0.5 --dport 80 -j REDIRECT --to-port 8080
       
    16 
       
    17 Intented usage is to monitor HTTP traffic and manipulate request/response headers and content,
       
    18 for debugging purposes and fun.
       
    19 
       
    20 2014-02-23 Radek Brich
       
    21 
       
    22 """
       
    23 
       
    24 from http.server import HTTPServer, BaseHTTPRequestHandler
       
    25 from http.client import HTTPConnection, HTTPException
       
    26 from socketserver import ThreadingMixIn
       
    27 import logging
       
    28 import gzip
       
    29 import re
       
    30 
       
    31 
       
    32 # Settings
       
    33 proxy_port = 8080
       
    34 log_level = logging.INFO
       
    35 
       
    36 
       
    37 class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
       
    38 
       
    39     pass
       
    40 
       
    41 
       
    42 class DumpHeaders:
       
    43 
       
    44     def __init__(self, headers):
       
    45         self.headers = headers
       
    46 
       
    47     def __str__(self):
       
    48         return '\n'.join('    %-19s %s' % (k+':', v) for k, v in self.headers.items())
       
    49 
       
    50 
       
    51 class ProxyRequestHandler(BaseHTTPRequestHandler):
       
    52 
       
    53     # Allow persistent connections
       
    54     protocol_version = 'HTTP/1.1'
       
    55 
       
    56     def __init__(self, *args, **kwargs):
       
    57         for command in ('GET', 'POST'):
       
    58             setattr(self, 'do_' + command, self._handle_request)
       
    59         self._fconn = None
       
    60         self.log = logging.getLogger(__name__)
       
    61         BaseHTTPRequestHandler.__init__(self, *args, **kwargs)
       
    62 
       
    63     def _handle_request(self):
       
    64         self.log.info('Orig request: %s %s', self.command, self.path)
       
    65         self.log.debug('Headers:\n%s', DumpHeaders(self.headers))
       
    66         resp = self._forward_request()
       
    67         self.response_status = resp.status
       
    68         self.response_reason = resp.reason
       
    69         self.response_headers = resp.headers
       
    70         self.response_body = resp.read(int(resp.headers.get('content-length')))
       
    71         self.log.info('Orig response: %s %s', self.response_status, self.response_reason)
       
    72         self.log.debug('Headers:\n%s', DumpHeaders(self.response_headers))
       
    73         self.manipulate_response()
       
    74         self.response_headers.replace_header('content-length', str(len(self.response_body)))
       
    75         self.log.info('Proxy response: %s %s', self.response_status, self.response_reason)
       
    76         self.log.debug('Headers:\n%s', DumpHeaders(self.response_headers))
       
    77         self.send_response(self.response_status, self.response_reason)
       
    78         for name, value in self.response_headers.items():
       
    79             self.send_header(name, value)
       
    80         self.end_headers()
       
    81         self.log.debug('start write')
       
    82         self.wfile.write(self.response_body)
       
    83         self.log.debug('done write')
       
    84 
       
    85     def _forward_request(self):
       
    86         if self.command == 'POST':
       
    87             content_len = int(self.headers.get('content-length'))
       
    88             body = self.rfile.read(content_len)
       
    89             self.log.debug('Body: %s', body)
       
    90         else:
       
    91             body = None
       
    92         if not self._fconn:
       
    93             self._connect_target()
       
    94         self.log.info('Proxy request: %s %s', self.command, self.path)
       
    95         try:
       
    96             self._fconn.request(self.command, self.path, body, self.headers)
       
    97             return self._fconn.getresponse()
       
    98         except HTTPException:
       
    99             # Try again, in case that server just closed connection.
       
   100             self._fconn.close()
       
   101             self._connect_target()
       
   102             self._fconn.request(self.command, self.path, body, self.headers)
       
   103             return self._fconn.getresponse()
       
   104 
       
   105     def _connect_target(self):
       
   106         host = self.headers.get('host')
       
   107         self.log.info('Connect to %s:80', host)
       
   108         self._fconn = HTTPConnection(host, timeout=30)
       
   109 
       
   110     def setup(self):
       
   111         super().setup()
       
   112         self.log.info('Connect from %s:%s', *self.client_address)
       
   113 
       
   114     def finish(self):
       
   115         self.log.info('--- Disconnect ---')
       
   116         super().finish()
       
   117         if self._fconn:
       
   118             self._fconn.close()
       
   119             self._fconn = None
       
   120 
       
   121     def handle_one_request(self):
       
   122         self.log.info('--- Request ---')
       
   123         super().handle_one_request()
       
   124 
       
   125     def log_error(self, fmt, *args):
       
   126         self.log.error(fmt, *args)
       
   127 
       
   128     def log_message(self, format_, *args):
       
   129         pass
       
   130 
       
   131     def _prepare_body(self):
       
   132         if self.response_headers.get('content-encoding') == 'gzip':
       
   133             self.response_body = gzip.decompress(self.response_body)
       
   134 
       
   135     def _finish_body(self):
       
   136         if self.response_headers.get('content-encoding') == 'gzip':
       
   137             self.response_body = gzip.compress(self.response_body)
       
   138 
       
   139     def manipulate_response(self):
       
   140         """Manipulate response from server.
       
   141 
       
   142         Request parameters are available as usual, see BaseHTTPRequestHandler.
       
   143 
       
   144         Response from server is available in variables (and can be altered):
       
   145         * self.response_status
       
   146         * self.response_reason
       
   147         * self.response_headers
       
   148         * self.response_body
       
   149 
       
   150         Before touching response_body, call `_prepare_body` and when finished call `_finish_body`.
       
   151         These take care of compression and content_length header.
       
   152 
       
   153         """
       
   154         pass
       
   155 
       
   156 
       
   157 class ExampleRequestHandler(ProxyRequestHandler):
       
   158 
       
   159     """Example proxying request handler.
       
   160 
       
   161     Manipulate content of one JavaScript file, leave rest as is.
       
   162 
       
   163     """
       
   164 
       
   165     def manipulate_response(self):
       
   166         # Enable web control interface for AV receiver Yamaha HTR-4066
       
   167         # by moving the model name from "low" list a little higher :-)
       
   168         if self.path == '/JavaScripts/scr0.js':
       
   169             self._prepare_body()
       
   170             self.response_body = re.sub(br'^ "HTR-4066",', br'', self.response_body, flags=re.MULTILINE)
       
   171             self.response_body = re.sub(br'^( var g_modelName6xx = \[)', br'\1"HTR-4066",', self.response_body, flags=re.MULTILINE)
       
   172             self._finish_body()
       
   173 
       
   174 
       
   175 if __name__ == "__main__":
       
   176     logging.basicConfig(level=log_level,
       
   177         format="%(asctime)s %(threadName)s %(levelname)s: %(message)s", datefmt="%H:%M:%S")
       
   178     transproxpy = ThreadingHTTPServer(('', proxy_port), ExampleRequestHandler)
       
   179     transproxpy.serve_forever()
       
   180