http_server_39.pyon commit initial commit (06b398e)
   1"""HTTP server classes.
   2
   3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
   4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
   5and CGIHTTPRequestHandler for CGI scripts.
   6
   7It does, however, optionally implement HTTP/1.1 persistent connections,
   8as of version 0.3.
   9
  10Notes on CGIHTTPRequestHandler
  11------------------------------
  12
  13This class implements GET and POST requests to cgi-bin scripts.
  14
  15If the os.fork() function is not present (e.g. on Windows),
  16subprocess.Popen() is used as a fallback, with slightly altered semantics.
  17
  18In all cases, the implementation is intentionally naive -- all
  19requests are executed synchronously.
  20
  21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
  22-- it may execute arbitrary Python code or external programs.
  23
  24Note that status code 200 is sent prior to execution of a CGI script, so
  25scripts cannot send other status codes such as 302 (redirect).
  26
  27XXX To do:
  28
  29- log requests even later (to capture byte count)
  30- log user-agent header and other interesting goodies
  31- send error log to separate file
  32"""
  33
  34
  35# See also:
  36#
  37# HTTP Working Group                                        T. Berners-Lee
  38# INTERNET-DRAFT                                            R. T. Fielding
  39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
  40# Expires September 8, 1995                                  March 8, 1995
  41#
  42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
  43#
  44# and
  45#
  46# Network Working Group                                      R. Fielding
  47# Request for Comments: 2616                                       et al
  48# Obsoletes: 2068                                              June 1999
  49# Category: Standards Track
  50#
  51# URL: http://www.faqs.org/rfcs/rfc2616.html
  52
  53# Log files
  54# ---------
  55#
  56# Here's a quote from the NCSA httpd docs about log file format.
  57#
  58# | The logfile format is as follows. Each line consists of:
  59# |
  60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
  61# |
  62# |        host: Either the DNS name or the IP number of the remote client
  63# |        rfc931: Any information returned by identd for this person,
  64# |                - otherwise.
  65# |        authuser: If user sent a userid for authentication, the user name,
  66# |                  - otherwise.
  67# |        DD: Day
  68# |        Mon: Month (calendar name)
  69# |        YYYY: Year
  70# |        hh: hour (24-hour format, the machine's timezone)
  71# |        mm: minutes
  72# |        ss: seconds
  73# |        request: The first line of the HTTP request as sent by the client.
  74# |        ddd: the status code returned by the server, - if not available.
  75# |        bbbb: the total number of bytes sent,
  76# |              *not including the HTTP/1.0 header*, - if not available
  77# |
  78# | You can determine the name of the file accessed through request.
  79#
  80# (Actually, the latter is only true if you know the server configuration
  81# at the time the request was made!)
  82
  83__version__ = "0.6"
  84
  85__all__ = [
  86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
  87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
  88]
  89
  90import copy
  91import datetime
  92import email.utils
  93import html
  94import http.client
  95import io
  96import mimetypes
  97import os
  98import posixpath
  99import select
 100import shutil
 101import socket # For gethostbyaddr()
 102import socketserver
 103import sys
 104import time
 105import urllib.parse
 106import contextlib
 107from functools import partial
 108
 109from http import HTTPStatus
 110
 111
 112# Default error message template
 113DEFAULT_ERROR_MESSAGE = """\
 114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
 115        "http://www.w3.org/TR/html4/strict.dtd">
 116<html>
 117    <head>
 118        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
 119        <title>Error response</title>
 120    </head>
 121    <body>
 122        <h1>Error response</h1>
 123        <p>Error code: %(code)d</p>
 124        <p>Message: %(message)s.</p>
 125        <p>Error code explanation: %(code)s - %(explain)s.</p>
 126    </body>
 127</html>
 128"""
 129
 130DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
 131
 132class HTTPServer(socketserver.TCPServer):
 133
 134    allow_reuse_address = 1    # Seems to make sense in testing environment
 135
 136    def server_bind(self):
 137        """Override server_bind to store the server name."""
 138        socketserver.TCPServer.server_bind(self)
 139        host, port = self.server_address[:2]
 140        self.server_name = socket.getfqdn(host)
 141        self.server_port = port
 142
 143
 144class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
 145    daemon_threads = True
 146
 147
 148class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
 149
 150    """HTTP request handler base class.
 151
 152    The following explanation of HTTP serves to guide you through the
 153    code as well as to expose any misunderstandings I may have about
 154    HTTP (so you don't need to read the code to figure out I'm wrong
 155    :-).
 156
 157    HTTP (HyperText Transfer Protocol) is an extensible protocol on
 158    top of a reliable stream transport (e.g. TCP/IP).  The protocol
 159    recognizes three parts to a request:
 160
 161    1. One line identifying the request type and path
 162    2. An optional set of RFC-822-style headers
 163    3. An optional data part
 164
 165    The headers and data are separated by a blank line.
 166
 167    The first line of the request has the form
 168
 169    <command> <path> <version>
 170
 171    where <command> is a (case-sensitive) keyword such as GET or POST,
 172    <path> is a string containing path information for the request,
 173    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
 174    <path> is encoded using the URL encoding scheme (using %xx to signify
 175    the ASCII character with hex code xx).
 176
 177    The specification specifies that lines are separated by CRLF but
 178    for compatibility with the widest range of clients recommends
 179    servers also handle LF.  Similarly, whitespace in the request line
 180    is treated sensibly (allowing multiple spaces between components
 181    and allowing trailing whitespace).
 182
 183    Similarly, for output, lines ought to be separated by CRLF pairs
 184    but most clients grok LF characters just fine.
 185
 186    If the first line of the request has the form
 187
 188    <command> <path>
 189
 190    (i.e. <version> is left out) then this is assumed to be an HTTP
 191    0.9 request; this form has no optional headers and data part and
 192    the reply consists of just the data.
 193
 194    The reply form of the HTTP 1.x protocol again has three parts:
 195
 196    1. One line giving the response code
 197    2. An optional set of RFC-822-style headers
 198    3. The data
 199
 200    Again, the headers and data are separated by a blank line.
 201
 202    The response code line has the form
 203
 204    <version> <responsecode> <responsestring>
 205
 206    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
 207    <responsecode> is a 3-digit response code indicating success or
 208    failure of the request, and <responsestring> is an optional
 209    human-readable string explaining what the response code means.
 210
 211    This server parses the request and the headers, and then calls a
 212    function specific to the request type (<command>).  Specifically,
 213    a request SPAM will be handled by a method do_SPAM().  If no
 214    such method exists the server sends an error response to the
 215    client.  If it exists, it is called with no arguments:
 216
 217    do_SPAM()
 218
 219    Note that the request name is case sensitive (i.e. SPAM and spam
 220    are different requests).
 221
 222    The various request details are stored in instance variables:
 223
 224    - client_address is the client IP address in the form (host,
 225    port);
 226
 227    - command, path and version are the broken-down request line;
 228
 229    - headers is an instance of email.message.Message (or a derived
 230    class) containing the header information;
 231
 232    - rfile is a file object open for reading positioned at the
 233    start of the optional input data part;
 234
 235    - wfile is a file object open for writing.
 236
 237    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
 238
 239    The first thing to be written must be the response line.  Then
 240    follow 0 or more header lines, then a blank line, and then the
 241    actual data (if any).  The meaning of the header lines depends on
 242    the command executed by the server; in most cases, when data is
 243    returned, there should be at least one header line of the form
 244
 245    Content-type: <type>/<subtype>
 246
 247    where <type> and <subtype> should be registered MIME types,
 248    e.g. "text/html" or "text/plain".
 249
 250    """
 251
 252    # The Python system version, truncated to its first component.
 253    sys_version = "Python/" + sys.version.split()[0]
 254
 255    # The server software version.  You may want to override this.
 256    # The format is multiple whitespace-separated strings,
 257    # where each string is of the form name[/version].
 258    server_version = "BaseHTTP/" + __version__
 259
 260    error_message_format = DEFAULT_ERROR_MESSAGE
 261    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
 262
 263    # The default request version.  This only affects responses up until
 264    # the point where the request line is parsed, so it mainly decides what
 265    # the client gets back when sending a malformed request line.
 266    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
 267    default_request_version = "HTTP/0.9"
 268
 269    def parse_request(self):
 270        """Parse a request (internal).
 271
 272        The request should be stored in self.raw_requestline; the results
 273        are in self.command, self.path, self.request_version and
 274        self.headers.
 275
 276        Return True for success, False for failure; on failure, any relevant
 277        error response has already been sent back.
 278
 279        """
 280        self.command = None  # set in case of error on the first line
 281        self.request_version = version = self.default_request_version
 282        self.close_connection = True
 283        requestline = str(self.raw_requestline, 'iso-8859-1')
 284        requestline = requestline.rstrip('\r\n')
 285        self.requestline = requestline
 286        words = requestline.split()
 287        if len(words) == 0:
 288            return False
 289
 290        if len(words) >= 3:  # Enough to determine protocol version
 291            version = words[-1]
 292            try:
 293                if not version.startswith('HTTP/'):
 294                    raise ValueError
 295                base_version_number = version.split('/', 1)[1]
 296                version_number = base_version_number.split(".")
 297                # RFC 2145 section 3.1 says there can be only one "." and
 298                #   - major and minor numbers MUST be treated as
 299                #      separate integers;
 300                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
 301                #      turn is lower than HTTP/12.3;
 302                #   - Leading zeros MUST be ignored by recipients.
 303                if len(version_number) != 2:
 304                    raise ValueError
 305                version_number = int(version_number[0]), int(version_number[1])
 306            except (ValueError, IndexError):
 307                self.send_error(
 308                    HTTPStatus.BAD_REQUEST,
 309                    "Bad request version (%r)" % version)
 310                return False
 311            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
 312                self.close_connection = False
 313            if version_number >= (2, 0):
 314                self.send_error(
 315                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
 316                    "Invalid HTTP version (%s)" % base_version_number)
 317                return False
 318            self.request_version = version
 319
 320        if not 2 <= len(words) <= 3:
 321            self.send_error(
 322                HTTPStatus.BAD_REQUEST,
 323                "Bad request syntax (%r)" % requestline)
 324            return False
 325        command, path = words[:2]
 326        if len(words) == 2:
 327            self.close_connection = True
 328            if command != 'GET':
 329                self.send_error(
 330                    HTTPStatus.BAD_REQUEST,
 331                    "Bad HTTP/0.9 request type (%r)" % command)
 332                return False
 333        self.command, self.path = command, path
 334
 335        # Examine the headers and look for a Connection directive.
 336        try:
 337            self.headers = http.client.parse_headers(self.rfile,
 338                                                     _class=self.MessageClass)
 339        except http.client.LineTooLong as err:
 340            self.send_error(
 341                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
 342                "Line too long",
 343                str(err))
 344            return False
 345        except http.client.HTTPException as err:
 346            self.send_error(
 347                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
 348                "Too many headers",
 349                str(err)
 350            )
 351            return False
 352
 353        conntype = self.headers.get('Connection', "")
 354        if conntype.lower() == 'close':
 355            self.close_connection = True
 356        elif (conntype.lower() == 'keep-alive' and
 357              self.protocol_version >= "HTTP/1.1"):
 358            self.close_connection = False
 359        # Examine the headers and look for an Expect directive
 360        expect = self.headers.get('Expect', "")
 361        if (expect.lower() == "100-continue" and
 362                self.protocol_version >= "HTTP/1.1" and
 363                self.request_version >= "HTTP/1.1"):
 364            if not self.handle_expect_100():
 365                return False
 366        return True
 367
 368    def handle_expect_100(self):
 369        """Decide what to do with an "Expect: 100-continue" header.
 370
 371        If the client is expecting a 100 Continue response, we must
 372        respond with either a 100 Continue or a final response before
 373        waiting for the request body. The default is to always respond
 374        with a 100 Continue. You can behave differently (for example,
 375        reject unauthorized requests) by overriding this method.
 376
 377        This method should either return True (possibly after sending
 378        a 100 Continue response) or send an error response and return
 379        False.
 380
 381        """
 382        self.send_response_only(HTTPStatus.CONTINUE)
 383        self.end_headers()
 384        return True
 385
 386    def handle_one_request(self):
 387        """Handle a single HTTP request.
 388
 389        You normally don't need to override this method; see the class
 390        __doc__ string for information on how to handle specific HTTP
 391        commands such as GET and POST.
 392
 393        """
 394        try:
 395            self.raw_requestline = self.rfile.readline(65537)
 396            if len(self.raw_requestline) > 65536:
 397                self.requestline = ''
 398                self.request_version = ''
 399                self.command = ''
 400                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
 401                return
 402            if not self.raw_requestline:
 403                self.close_connection = True
 404                return
 405            if not self.parse_request():
 406                # An error code has been sent, just exit
 407                return
 408            mname = 'do_' + self.command
 409            if not hasattr(self, mname):
 410                self.send_error(
 411                    HTTPStatus.NOT_IMPLEMENTED,
 412                    "Unsupported method (%r)" % self.command)
 413                return
 414            method = getattr(self, mname)
 415            method()
 416            self.wfile.flush() #actually send the response if not already done.
 417        except socket.timeout as e:
 418            #a read or a write timed out.  Discard this connection
 419            self.log_error("Request timed out: %r", e)
 420            self.close_connection = True
 421            return
 422
 423    def handle(self):
 424        """Handle multiple requests if necessary."""
 425        self.close_connection = True
 426
 427        self.handle_one_request()
 428        while not self.close_connection:
 429            self.handle_one_request()
 430
 431    def send_error(self, code, message=None, explain=None):
 432        """Send and log an error reply.
 433
 434        Arguments are
 435        * code:    an HTTP error code
 436                   3 digits
 437        * message: a simple optional 1 line reason phrase.
 438                   *( HTAB / SP / VCHAR / %x80-FF )
 439                   defaults to short entry matching the response code
 440        * explain: a detailed message defaults to the long entry
 441                   matching the response code.
 442
 443        This sends an error response (so it must be called before any
 444        output has been generated), logs the error, and finally sends
 445        a piece of HTML explaining the error to the user.
 446
 447        """
 448
 449        try:
 450            shortmsg, longmsg = self.responses[code]
 451        except KeyError:
 452            shortmsg, longmsg = '???', '???'
 453        if message is None:
 454            message = shortmsg
 455        if explain is None:
 456            explain = longmsg
 457        self.log_error("code %d, message %s", code, message)
 458        self.send_response(code, message)
 459        self.send_header('Connection', 'close')
 460
 461        # Message body is omitted for cases described in:
 462        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
 463        #  - RFC7231: 6.3.6. 205(Reset Content)
 464        body = None
 465        if (code >= 200 and
 466            code not in (HTTPStatus.NO_CONTENT,
 467                         HTTPStatus.RESET_CONTENT,
 468                         HTTPStatus.NOT_MODIFIED)):
 469            # HTML encode to prevent Cross Site Scripting attacks
 470            # (see bug #1100201)
 471            content = (self.error_message_format % {
 472                'code': code,
 473                'message': html.escape(message, quote=False),
 474                'explain': html.escape(explain, quote=False)
 475            })
 476            body = content.encode('UTF-8', 'replace')
 477            self.send_header("Content-Type", self.error_content_type)
 478            self.send_header('Content-Length', str(len(body)))
 479        self.end_headers()
 480
 481        if self.command != 'HEAD' and body:
 482            self.wfile.write(body)
 483
 484    def send_response(self, code, message=None):
 485        """Add the response header to the headers buffer and log the
 486        response code.
 487
 488        Also send two standard headers with the server software
 489        version and the current date.
 490
 491        """
 492        self.log_request(code)
 493        self.send_response_only(code, message)
 494        self.send_header('Server', self.version_string())
 495        self.send_header('Date', self.date_time_string())
 496
 497    def send_response_only(self, code, message=None):
 498        """Send the response header only."""
 499        if self.request_version != 'HTTP/0.9':
 500            if message is None:
 501                if code in self.responses:
 502                    message = self.responses[code][0]
 503                else:
 504                    message = ''
 505            if not hasattr(self, '_headers_buffer'):
 506                self._headers_buffer = []
 507            self._headers_buffer.append(("%s %d %s\r\n" %
 508                    (self.protocol_version, code, message)).encode(
 509                        'latin-1', 'strict'))
 510
 511    def send_header(self, keyword, value):
 512        """Send a MIME header to the headers buffer."""
 513        if self.request_version != 'HTTP/0.9':
 514            if not hasattr(self, '_headers_buffer'):
 515                self._headers_buffer = []
 516            self._headers_buffer.append(
 517                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
 518
 519        if keyword.lower() == 'connection':
 520            if value.lower() == 'close':
 521                self.close_connection = True
 522            elif value.lower() == 'keep-alive':
 523                self.close_connection = False
 524
 525    def end_headers(self):
 526        """Send the blank line ending the MIME headers."""
 527        if self.request_version != 'HTTP/0.9':
 528            self._headers_buffer.append(b"\r\n")
 529            self.flush_headers()
 530
 531    def flush_headers(self):
 532        if hasattr(self, '_headers_buffer'):
 533            self.wfile.write(b"".join(self._headers_buffer))
 534            self._headers_buffer = []
 535
 536    def log_request(self, code='-', size='-'):
 537        """Log an accepted request.
 538
 539        This is called by send_response().
 540
 541        """
 542        if isinstance(code, HTTPStatus):
 543            code = code.value
 544        self.log_message('"%s" %s %s',
 545                         self.requestline, str(code), str(size))
 546
 547    def log_error(self, format, *args):
 548        """Log an error.
 549
 550        This is called when a request cannot be fulfilled.  By
 551        default it passes the message on to log_message().
 552
 553        Arguments are the same as for log_message().
 554
 555        XXX This should go to the separate error log.
 556
 557        """
 558
 559        self.log_message(format, *args)
 560
 561    def log_message(self, format, *args):
 562        """Log an arbitrary message.
 563
 564        This is used by all other logging functions.  Override
 565        it if you have specific logging wishes.
 566
 567        The first argument, FORMAT, is a format string for the
 568        message to be logged.  If the format string contains
 569        any % escapes requiring parameters, they should be
 570        specified as subsequent arguments (it's just like
 571        printf!).
 572
 573        The client ip and current date/time are prefixed to
 574        every message.
 575
 576        """
 577
 578        sys.stderr.write("%s - - [%s] %s\n" %
 579                         (self.address_string(),
 580                          self.log_date_time_string(),
 581                          format%args))
 582
 583    def version_string(self):
 584        """Return the server software version string."""
 585        return self.server_version + ' ' + self.sys_version
 586
 587    def date_time_string(self, timestamp=None):
 588        """Return the current date and time formatted for a message header."""
 589        if timestamp is None:
 590            timestamp = time.time()
 591        return email.utils.formatdate(timestamp, usegmt=True)
 592
 593    def log_date_time_string(self):
 594        """Return the current time formatted for logging."""
 595        now = time.time()
 596        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
 597        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
 598                day, self.monthname[month], year, hh, mm, ss)
 599        return s
 600
 601    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
 602
 603    monthname = [None,
 604                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
 605                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
 606
 607    def address_string(self):
 608        """Return the client address."""
 609
 610        return self.client_address[0]
 611
 612    # Essentially static class variables
 613
 614    # The version of the HTTP protocol we support.
 615    # Set this to HTTP/1.1 to enable automatic keepalive
 616    protocol_version = "HTTP/1.0"
 617
 618    # MessageClass used to parse headers
 619    MessageClass = http.client.HTTPMessage
 620
 621    # hack to maintain backwards compatibility
 622    responses = {
 623        v: (v.phrase, v.description)
 624        for v in HTTPStatus.__members__.values()
 625    }
 626
 627
 628class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
 629
 630    """Simple HTTP request handler with GET and HEAD commands.
 631
 632    This serves files from the current directory and any of its
 633    subdirectories.  The MIME type for files is determined by
 634    calling the .guess_type() method.
 635
 636    The GET and HEAD requests are identical except that the HEAD
 637    request omits the actual contents of the file.
 638
 639    """
 640
 641    server_version = "SimpleHTTP/" + __version__
 642    extensions_map = _encodings_map_default = {
 643        '.gz': 'application/gzip',
 644        '.Z': 'application/octet-stream',
 645        '.bz2': 'application/x-bzip2',
 646        '.xz': 'application/x-xz',
 647    }
 648
 649    def __init__(self, *args, directory=None, **kwargs):
 650        if directory is None:
 651            directory = os.getcwd()
 652        self.directory = os.fspath(directory)
 653        super().__init__(*args, **kwargs)
 654
 655    def do_GET(self):
 656        """Serve a GET request."""
 657        f = self.send_head()
 658        if f:
 659            try:
 660                self.copyfile(f, self.wfile)
 661            finally:
 662                f.close()
 663
 664    def do_HEAD(self):
 665        """Serve a HEAD request."""
 666        f = self.send_head()
 667        if f:
 668            f.close()
 669
 670    def send_head(self):
 671        """Common code for GET and HEAD commands.
 672
 673        This sends the response code and MIME headers.
 674
 675        Return value is either a file object (which has to be copied
 676        to the outputfile by the caller unless the command was HEAD,
 677        and must be closed by the caller under all circumstances), or
 678        None, in which case the caller has nothing further to do.
 679
 680        """
 681        path = self.translate_path(self.path)
 682        f = None
 683        if os.path.isdir(path):
 684            parts = urllib.parse.urlsplit(self.path)
 685            if not parts.path.endswith('/'):
 686                # redirect browser - doing basically what apache does
 687                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
 688                new_parts = (parts[0], parts[1], parts[2] + '/',
 689                             parts[3], parts[4])
 690                new_url = urllib.parse.urlunsplit(new_parts)
 691                self.send_header("Location", new_url)
 692                self.end_headers()
 693                return None
 694            for index in "index.html", "index.htm":
 695                index = os.path.join(path, index)
 696                if os.path.exists(index):
 697                    path = index
 698                    break
 699            else:
 700                return self.list_directory(path)
 701        ctype = self.guess_type(path)
 702        # check for trailing "/" which should return 404. See Issue17324
 703        # The test for this was added in test_httpserver.py
 704        # However, some OS platforms accept a trailingSlash as a filename
 705        # See discussion on python-dev and Issue34711 regarding
 706        # parseing and rejection of filenames with a trailing slash
 707        if path.endswith("/"):
 708            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
 709            return None
 710        try:
 711            f = open(path, 'rb')
 712        except OSError:
 713            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
 714            return None
 715
 716        try:
 717            fs = os.fstat(f.fileno())
 718            # Use browser cache if possible
 719            if ("If-Modified-Since" in self.headers
 720                    and "If-None-Match" not in self.headers):
 721                # compare If-Modified-Since and time of last file modification
 722                try:
 723                    ims = email.utils.parsedate_to_datetime(
 724                        self.headers["If-Modified-Since"])
 725                except (TypeError, IndexError, OverflowError, ValueError):
 726                    # ignore ill-formed values
 727                    pass
 728                else:
 729                    if ims.tzinfo is None:
 730                        # obsolete format with no timezone, cf.
 731                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
 732                        ims = ims.replace(tzinfo=datetime.timezone.utc)
 733                    if ims.tzinfo is datetime.timezone.utc:
 734                        # compare to UTC datetime of last modification
 735                        last_modif = datetime.datetime.fromtimestamp(
 736                            fs.st_mtime, datetime.timezone.utc)
 737                        # remove microseconds, like in If-Modified-Since
 738                        last_modif = last_modif.replace(microsecond=0)
 739
 740                        if last_modif <= ims:
 741                            self.send_response(HTTPStatus.NOT_MODIFIED)
 742                            self.end_headers()
 743                            f.close()
 744                            return None
 745
 746            self.send_response(HTTPStatus.OK)
 747            self.send_header("Content-type", ctype)
 748            self.send_header("Content-Length", str(fs[6]))
 749            self.send_header("Last-Modified",
 750                self.date_time_string(fs.st_mtime))
 751            self.end_headers()
 752            return f
 753        except:
 754            f.close()
 755            raise
 756
 757    def list_directory(self, path):
 758        """Helper to produce a directory listing (absent index.html).
 759
 760        Return value is either a file object, or None (indicating an
 761        error).  In either case, the headers are sent, making the
 762        interface the same as for send_head().
 763
 764        """
 765        try:
 766            list = os.listdir(path)
 767        except OSError:
 768            self.send_error(
 769                HTTPStatus.NOT_FOUND,
 770                "No permission to list directory")
 771            return None
 772        list.sort(key=lambda a: a.lower())
 773        r = []
 774        try:
 775            displaypath = urllib.parse.unquote(self.path,
 776                                               errors='surrogatepass')
 777        except UnicodeDecodeError:
 778            displaypath = urllib.parse.unquote(path)
 779        displaypath = html.escape(displaypath, quote=False)
 780        enc = sys.getfilesystemencoding()
 781        title = 'Directory listing for %s' % displaypath
 782        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
 783                 '"http://www.w3.org/TR/html4/strict.dtd">')
 784        r.append('<html>\n<head>')
 785        r.append('<meta http-equiv="Content-Type" '
 786                 'content="text/html; charset=%s">' % enc)
 787        r.append('<title>%s</title>\n</head>' % title)
 788        r.append('<body>\n<h1>%s</h1>' % title)
 789        r.append('<hr>\n<ul>')
 790        for name in list:
 791            fullname = os.path.join(path, name)
 792            displayname = linkname = name
 793            # Append / for directories or @ for symbolic links
 794            if os.path.isdir(fullname):
 795                displayname = name + "/"
 796                linkname = name + "/"
 797            if os.path.islink(fullname):
 798                displayname = name + "@"
 799                # Note: a link to a directory displays with @ and links with /
 800            r.append('<li><a href="%s">%s</a></li>'
 801                    % (urllib.parse.quote(linkname,
 802                                          errors='surrogatepass'),
 803                       html.escape(displayname, quote=False)))
 804        r.append('</ul>\n<hr>\n</body>\n</html>\n')
 805        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
 806        f = io.BytesIO()
 807        f.write(encoded)
 808        f.seek(0)
 809        self.send_response(HTTPStatus.OK)
 810        self.send_header("Content-type", "text/html; charset=%s" % enc)
 811        self.send_header("Content-Length", str(len(encoded)))
 812        self.end_headers()
 813        return f
 814
 815    def translate_path(self, path):
 816        """Translate a /-separated PATH to the local filename syntax.
 817
 818        Components that mean special things to the local file system
 819        (e.g. drive or directory names) are ignored.  (XXX They should
 820        probably be diagnosed.)
 821
 822        """
 823        # abandon query parameters
 824        path = path.split('?',1)[0]
 825        path = path.split('#',1)[0]
 826        # Don't forget explicit trailing slash when normalizing. Issue17324
 827        trailing_slash = path.rstrip().endswith('/')
 828        try:
 829            path = urllib.parse.unquote(path, errors='surrogatepass')
 830        except UnicodeDecodeError:
 831            path = urllib.parse.unquote(path)
 832        path = posixpath.normpath(path)
 833        words = path.split('/')
 834        words = filter(None, words)
 835        path = self.directory
 836        for word in words:
 837            if os.path.dirname(word) or word in (os.curdir, os.pardir):
 838                # Ignore components that are not a simple file/directory name
 839                continue
 840            path = os.path.join(path, word)
 841        if trailing_slash:
 842            path += '/'
 843        return path
 844
 845    def copyfile(self, source, outputfile):
 846        """Copy all data between two file objects.
 847
 848        The SOURCE argument is a file object open for reading
 849        (or anything with a read() method) and the DESTINATION
 850        argument is a file object open for writing (or
 851        anything with a write() method).
 852
 853        The only reason for overriding this would be to change
 854        the block size or perhaps to replace newlines by CRLF
 855        -- note however that this the default server uses this
 856        to copy binary data as well.
 857
 858        """
 859        shutil.copyfileobj(source, outputfile)
 860
 861    def guess_type(self, path):
 862        """Guess the type of a file.
 863
 864        Argument is a PATH (a filename).
 865
 866        Return value is a string of the form type/subtype,
 867        usable for a MIME Content-type header.
 868
 869        The default implementation looks the file's extension
 870        up in the table self.extensions_map, using application/octet-stream
 871        as a default; however it would be permissible (if
 872        slow) to look inside the data to make a better guess.
 873
 874        """
 875        base, ext = posixpath.splitext(path)
 876        if ext in self.extensions_map:
 877            return self.extensions_map[ext]
 878        ext = ext.lower()
 879        if ext in self.extensions_map:
 880            return self.extensions_map[ext]
 881        guess, _ = mimetypes.guess_type(path)
 882        if guess:
 883            return guess
 884        return 'application/octet-stream'
 885
 886
 887# Utilities for CGIHTTPRequestHandler
 888
 889def _url_collapse_path(path):
 890    """
 891    Given a URL path, remove extra '/'s and '.' path elements and collapse
 892    any '..' references and returns a collapsed path.
 893
 894    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
 895    The utility of this function is limited to is_cgi method and helps
 896    preventing some security attacks.
 897
 898    Returns: The reconstituted URL, which will always start with a '/'.
 899
 900    Raises: IndexError if too many '..' occur within the path.
 901
 902    """
 903    # Query component should not be involved.
 904    path, _, query = path.partition('?')
 905    path = urllib.parse.unquote(path)
 906
 907    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
 908    # path semantics rather than local operating system semantics.
 909    path_parts = path.split('/')
 910    head_parts = []
 911    for part in path_parts[:-1]:
 912        if part == '..':
 913            head_parts.pop() # IndexError if more '..' than prior parts
 914        elif part and part != '.':
 915            head_parts.append( part )
 916    if path_parts:
 917        tail_part = path_parts.pop()
 918        if tail_part:
 919            if tail_part == '..':
 920                head_parts.pop()
 921                tail_part = ''
 922            elif tail_part == '.':
 923                tail_part = ''
 924    else:
 925        tail_part = ''
 926
 927    if query:
 928        tail_part = '?'.join((tail_part, query))
 929
 930    splitpath = ('/' + '/'.join(head_parts), tail_part)
 931    collapsed_path = "/".join(splitpath)
 932
 933    return collapsed_path
 934
 935
 936
 937nobody = None
 938
 939def nobody_uid():
 940    """Internal routine to get nobody's uid"""
 941    global nobody
 942    if nobody:
 943        return nobody
 944    try:
 945        import pwd
 946    except ImportError:
 947        return -1
 948    try:
 949        nobody = pwd.getpwnam('nobody')[2]
 950    except KeyError:
 951        nobody = 1 + max(x[2] for x in pwd.getpwall())
 952    return nobody
 953
 954
 955def executable(path):
 956    """Test for executable file."""
 957    return os.access(path, os.X_OK)
 958
 959
 960class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
 961
 962    """Complete HTTP server with GET, HEAD and POST commands.
 963
 964    GET and HEAD also support running CGI scripts.
 965
 966    The POST command is *only* implemented for CGI scripts.
 967
 968    """
 969
 970    # Determine platform specifics
 971    have_fork = hasattr(os, 'fork')
 972
 973    # Make rfile unbuffered -- we need to read one line and then pass
 974    # the rest to a subprocess, so we can't use buffered input.
 975    rbufsize = 0
 976
 977    def do_POST(self):
 978        """Serve a POST request.
 979
 980        This is only implemented for CGI scripts.
 981
 982        """
 983
 984        if self.is_cgi():
 985            self.run_cgi()
 986        else:
 987            self.send_error(
 988                HTTPStatus.NOT_IMPLEMENTED,
 989                "Can only POST to CGI scripts")
 990
 991    def send_head(self):
 992        """Version of send_head that support CGI scripts"""
 993        if self.is_cgi():
 994            return self.run_cgi()
 995        else:
 996            return SimpleHTTPRequestHandler.send_head(self)
 997
 998    def is_cgi(self):
 999        """Test whether self.path corresponds to a CGI script.
1000
1001        Returns True and updates the cgi_info attribute to the tuple
1002        (dir, rest) if self.path requires running a CGI script.
1003        Returns False otherwise.
1004
1005        If any exception is raised, the caller should assume that
1006        self.path was rejected as invalid and act accordingly.
1007
1008        The default implementation tests whether the normalized url
1009        path begins with one of the strings in self.cgi_directories
1010        (and the next character is a '/' or the end of the string).
1011
1012        """
1013        collapsed_path = _url_collapse_path(self.path)
1014        dir_sep = collapsed_path.find('/', 1)
1015        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1016            dir_sep = collapsed_path.find('/', dir_sep+1)
1017        if dir_sep > 0:
1018            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1019            self.cgi_info = head, tail
1020            return True
1021        return False
1022
1023
1024    cgi_directories = ['/cgi-bin', '/htbin']
1025
1026    def is_executable(self, path):
1027        """Test whether argument path is an executable file."""
1028        return executable(path)
1029
1030    def is_python(self, path):
1031        """Test whether argument path is a Python script."""
1032        head, tail = os.path.splitext(path)
1033        return tail.lower() in (".py", ".pyw")
1034
1035    def run_cgi(self):
1036        """Execute a CGI script."""
1037        dir, rest = self.cgi_info
1038        path = dir + '/' + rest
1039        i = path.find('/', len(dir)+1)
1040        while i >= 0:
1041            nextdir = path[:i]
1042            nextrest = path[i+1:]
1043
1044            scriptdir = self.translate_path(nextdir)
1045            if os.path.isdir(scriptdir):
1046                dir, rest = nextdir, nextrest
1047                i = path.find('/', len(dir)+1)
1048            else:
1049                break
1050
1051        # find an explicit query string, if present.
1052        rest, _, query = rest.partition('?')
1053
1054        # dissect the part after the directory name into a script name &
1055        # a possible additional path, to be stored in PATH_INFO.
1056        i = rest.find('/')
1057        if i >= 0:
1058            script, rest = rest[:i], rest[i:]
1059        else:
1060            script, rest = rest, ''
1061
1062        scriptname = dir + '/' + script
1063        scriptfile = self.translate_path(scriptname)
1064        if not os.path.exists(scriptfile):
1065            self.send_error(
1066                HTTPStatus.NOT_FOUND,
1067                "No such CGI script (%r)" % scriptname)
1068            return
1069        if not os.path.isfile(scriptfile):
1070            self.send_error(
1071                HTTPStatus.FORBIDDEN,
1072                "CGI script is not a plain file (%r)" % scriptname)
1073            return
1074        ispy = self.is_python(scriptname)
1075        if self.have_fork or not ispy:
1076            if not self.is_executable(scriptfile):
1077                self.send_error(
1078                    HTTPStatus.FORBIDDEN,
1079                    "CGI script is not executable (%r)" % scriptname)
1080                return
1081
1082        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1083        # XXX Much of the following could be prepared ahead of time!
1084        env = copy.deepcopy(os.environ)
1085        env['SERVER_SOFTWARE'] = self.version_string()
1086        env['SERVER_NAME'] = self.server.server_name
1087        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1088        env['SERVER_PROTOCOL'] = self.protocol_version
1089        env['SERVER_PORT'] = str(self.server.server_port)
1090        env['REQUEST_METHOD'] = self.command
1091        uqrest = urllib.parse.unquote(rest)
1092        env['PATH_INFO'] = uqrest
1093        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1094        env['SCRIPT_NAME'] = scriptname
1095        if query:
1096            env['QUERY_STRING'] = query
1097        env['REMOTE_ADDR'] = self.client_address[0]
1098        authorization = self.headers.get("authorization")
1099        if authorization:
1100            authorization = authorization.split()
1101            if len(authorization) == 2:
1102                import base64, binascii
1103                env['AUTH_TYPE'] = authorization[0]
1104                if authorization[0].lower() == "basic":
1105                    try:
1106                        authorization = authorization[1].encode('ascii')
1107                        authorization = base64.decodebytes(authorization).\
1108                                        decode('ascii')
1109                    except (binascii.Error, UnicodeError):
1110                        pass
1111                    else:
1112                        authorization = authorization.split(':')
1113                        if len(authorization) == 2:
1114                            env['REMOTE_USER'] = authorization[0]
1115        # XXX REMOTE_IDENT
1116        if self.headers.get('content-type') is None:
1117            env['CONTENT_TYPE'] = self.headers.get_content_type()
1118        else:
1119            env['CONTENT_TYPE'] = self.headers['content-type']
1120        length = self.headers.get('content-length')
1121        if length:
1122            env['CONTENT_LENGTH'] = length
1123        referer = self.headers.get('referer')
1124        if referer:
1125            env['HTTP_REFERER'] = referer
1126        accept = self.headers.get_all('accept', ())
1127        env['HTTP_ACCEPT'] = ','.join(accept)
1128        ua = self.headers.get('user-agent')
1129        if ua:
1130            env['HTTP_USER_AGENT'] = ua
1131        co = filter(None, self.headers.get_all('cookie', []))
1132        cookie_str = ', '.join(co)
1133        if cookie_str:
1134            env['HTTP_COOKIE'] = cookie_str
1135        # XXX Other HTTP_* headers
1136        # Since we're setting the env in the parent, provide empty
1137        # values to override previously set values
1138        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1139                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1140            env.setdefault(k, "")
1141
1142        self.send_response(HTTPStatus.OK, "Script output follows")
1143        self.flush_headers()
1144
1145        decoded_query = query.replace('+', ' ')
1146
1147        if self.have_fork:
1148            # Unix -- fork as we should
1149            args = [script]
1150            if '=' not in decoded_query:
1151                args.append(decoded_query)
1152            nobody = nobody_uid()
1153            self.wfile.flush() # Always flush before forking
1154            pid = os.fork()
1155            if pid != 0:
1156                # Parent
1157                pid, sts = os.waitpid(pid, 0)
1158                # throw away additional data [see bug #427345]
1159                while select.select([self.rfile], [], [], 0)[0]:
1160                    if not self.rfile.read(1):
1161                        break
1162                exitcode = os.waitstatus_to_exitcode(sts)
1163                if exitcode:
1164                    self.log_error(f"CGI script exit code {exitcode}")
1165                return
1166            # Child
1167            try:
1168                try:
1169                    os.setuid(nobody)
1170                except OSError:
1171                    pass
1172                os.dup2(self.rfile.fileno(), 0)
1173                os.dup2(self.wfile.fileno(), 1)
1174                os.execve(scriptfile, args, env)
1175            except:
1176                self.server.handle_error(self.request, self.client_address)
1177                os._exit(127)
1178
1179        else:
1180            # Non-Unix -- use subprocess
1181            import subprocess
1182            cmdline = [scriptfile]
1183            if self.is_python(scriptfile):
1184                interp = sys.executable
1185                if interp.lower().endswith("w.exe"):
1186                    # On Windows, use python.exe, not pythonw.exe
1187                    interp = interp[:-5] + interp[-4:]
1188                cmdline = [interp, '-u'] + cmdline
1189            if '=' not in query:
1190                cmdline.append(query)
1191            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1192            try:
1193                nbytes = int(length)
1194            except (TypeError, ValueError):
1195                nbytes = 0
1196            p = subprocess.Popen(cmdline,
1197                                 stdin=subprocess.PIPE,
1198                                 stdout=subprocess.PIPE,
1199                                 stderr=subprocess.PIPE,
1200                                 env = env
1201                                 )
1202            if self.command.lower() == "post" and nbytes > 0:
1203                data = self.rfile.read(nbytes)
1204            else:
1205                data = None
1206            # throw away additional data [see bug #427345]
1207            while select.select([self.rfile._sock], [], [], 0)[0]:
1208                if not self.rfile._sock.recv(1):
1209                    break
1210            stdout, stderr = p.communicate(data)
1211            self.wfile.write(stdout)
1212            if stderr:
1213                self.log_error('%s', stderr)
1214            p.stderr.close()
1215            p.stdout.close()
1216            status = p.returncode
1217            if status:
1218                self.log_error("CGI script exit status %#x", status)
1219            else:
1220                self.log_message("CGI script exited OK")
1221
1222
1223def _get_best_family(*address):
1224    infos = socket.getaddrinfo(
1225        *address,
1226        type=socket.SOCK_STREAM,
1227        flags=socket.AI_PASSIVE,
1228    )
1229    family, type, proto, canonname, sockaddr = next(iter(infos))
1230    return family, sockaddr
1231
1232
1233def test(HandlerClass=BaseHTTPRequestHandler,
1234         ServerClass=ThreadingHTTPServer,
1235         protocol="HTTP/1.0", port=8000, bind=None):
1236    """Test the HTTP request handler class.
1237
1238    This runs an HTTP server on port 8000 (or the port argument).
1239
1240    """
1241    ServerClass.address_family, addr = _get_best_family(bind, port)
1242
1243    HandlerClass.protocol_version = protocol
1244    with ServerClass(addr, HandlerClass) as httpd:
1245        host, port = httpd.socket.getsockname()[:2]
1246        url_host = f'[{host}]' if ':' in host else host
1247        print(
1248            f"Serving HTTP on {host} port {port} "
1249            f"(http://{url_host}:{port}/) ..."
1250        )
1251        try:
1252            httpd.serve_forever()
1253        except KeyboardInterrupt:
1254            print("\nKeyboard interrupt received, exiting.")
1255            sys.exit(0)
1256
1257if __name__ == '__main__':
1258    import argparse
1259
1260    parser = argparse.ArgumentParser()
1261    parser.add_argument('--cgi', action='store_true',
1262                       help='Run as CGI Server')
1263    parser.add_argument('--bind', '-b', metavar='ADDRESS',
1264                        help='Specify alternate bind address '
1265                             '[default: all interfaces]')
1266    parser.add_argument('--directory', '-d', default=os.getcwd(),
1267                        help='Specify alternative directory '
1268                        '[default:current directory]')
1269    parser.add_argument('port', action='store',
1270                        default=8000, type=int,
1271                        nargs='?',
1272                        help='Specify alternate port [default: 8000]')
1273    args = parser.parse_args()
1274    if args.cgi:
1275        handler_class = CGIHTTPRequestHandler
1276    else:
1277        handler_class = partial(SimpleHTTPRequestHandler,
1278                                directory=args.directory)
1279
1280    # ensure dual-stack is not disabled; ref #38907
1281    class DualStackServer(ThreadingHTTPServer):
1282        def server_bind(self):
1283            # suppress exception when protocol is IPv4
1284            with contextlib.suppress(Exception):
1285                self.socket.setsockopt(
1286                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1287            return super().server_bind()
1288
1289    test(
1290        HandlerClass=handler_class,
1291        ServerClass=DualStackServer,
1292        port=args.port,
1293        bind=args.bind,
1294    )