-"""HTTP server classes.
-
-Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
-SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
-and CGIHTTPRequestHandler for CGI scripts.
-
-It does, however, optionally implement HTTP/1.1 persistent connections,
-as of version 0.3.
-
-Notes on CGIHTTPRequestHandler
-------------------------------
-
-This class implements GET and POST requests to cgi-bin scripts.
-
-If the os.fork() function is not present (e.g. on Windows),
-subprocess.Popen() is used as a fallback, with slightly altered semantics.
-
-In all cases, the implementation is intentionally naive -- all
-requests are executed synchronously.
-
-SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
--- it may execute arbitrary Python code or external programs.
-
-Note that status code 200 is sent prior to execution of a CGI script, so
-scripts cannot send other status codes such as 302 (redirect).
-
-XXX To do:
-
-- log requests even later (to capture byte count)
-- log user-agent header and other interesting goodies
-- send error log to separate file
-"""
-
-
-# See also:
-#
-# HTTP Working Group T. Berners-Lee
-# INTERNET-DRAFT R. T. Fielding
-# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
-# Expires September 8, 1995 March 8, 1995
-#
-# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
-#
-# and
-#
-# Network Working Group R. Fielding
-# Request for Comments: 2616 et al
-# Obsoletes: 2068 June 1999
-# Category: Standards Track
-#
-# URL: http://www.faqs.org/rfcs/rfc2616.html
-
-# Log files
-# ---------
-#
-# Here's a quote from the NCSA httpd docs about log file format.
-#
-# | The logfile format is as follows. Each line consists of:
-# |
-# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
-# |
-# | host: Either the DNS name or the IP number of the remote client
-# | rfc931: Any information returned by identd for this person,
-# | - otherwise.
-# | authuser: If user sent a userid for authentication, the user name,
-# | - otherwise.
-# | DD: Day
-# | Mon: Month (calendar name)
-# | YYYY: Year
-# | hh: hour (24-hour format, the machine's timezone)
-# | mm: minutes
-# | ss: seconds
-# | request: The first line of the HTTP request as sent by the client.
-# | ddd: the status code returned by the server, - if not available.
-# | bbbb: the total number of bytes sent,
-# | *not including the HTTP/1.0 header*, - if not available
-# |
-# | You can determine the name of the file accessed through request.
-#
-# (Actually, the latter is only true if you know the server configuration
-# at the time the request was made!)
-
-__version__ = "0.6"
-
-__all__ = [
- "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
- "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
-]
-
-import copy
-import datetime
-import email.utils
-import html
-import http.client
-import io
-import mimetypes
-import os
-import posixpath
-import select
-import shutil
-import socket # For gethostbyaddr()
-import socketserver
-import sys
-import time
-import urllib.parse
-import contextlib
-from functools import partial
-
-from http import HTTPStatus
-
-
-# Default error message template
-DEFAULT_ERROR_MESSAGE = """\
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
- <head>
- <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
- <title>Error response</title>
- </head>
- <body>
- <h1>Error response</h1>
- <p>Error code: %(code)d</p>
- <p>Message: %(message)s.</p>
- <p>Error code explanation: %(code)s - %(explain)s.</p>
- </body>
-</html>
-"""
-
-DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
-
-class HTTPServer(socketserver.TCPServer):
-
- allow_reuse_address = 1 # Seems to make sense in testing environment
-
- def server_bind(self):
- """Override server_bind to store the server name."""
- socketserver.TCPServer.server_bind(self)
- host, port = self.server_address[:2]
- self.server_name = socket.getfqdn(host)
- self.server_port = port
-
-
-class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
- daemon_threads = True
-
-
-class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
-
- """HTTP request handler base class.
-
- The following explanation of HTTP serves to guide you through the
- code as well as to expose any misunderstandings I may have about
- HTTP (so you don't need to read the code to figure out I'm wrong
- :-).
-
- HTTP (HyperText Transfer Protocol) is an extensible protocol on
- top of a reliable stream transport (e.g. TCP/IP). The protocol
- recognizes three parts to a request:
-
- 1. One line identifying the request type and path
- 2. An optional set of RFC-822-style headers
- 3. An optional data part
-
- The headers and data are separated by a blank line.
-
- The first line of the request has the form
-
- <command> <path> <version>
-
- where <command> is a (case-sensitive) keyword such as GET or POST,
- <path> is a string containing path information for the request,
- and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
- <path> is encoded using the URL encoding scheme (using %xx to signify
- the ASCII character with hex code xx).
-
- The specification specifies that lines are separated by CRLF but
- for compatibility with the widest range of clients recommends
- servers also handle LF. Similarly, whitespace in the request line
- is treated sensibly (allowing multiple spaces between components
- and allowing trailing whitespace).
-
- Similarly, for output, lines ought to be separated by CRLF pairs
- but most clients grok LF characters just fine.
-
- If the first line of the request has the form
-
- <command> <path>
-
- (i.e. <version> is left out) then this is assumed to be an HTTP
- 0.9 request; this form has no optional headers and data part and
- the reply consists of just the data.
-
- The reply form of the HTTP 1.x protocol again has three parts:
-
- 1. One line giving the response code
- 2. An optional set of RFC-822-style headers
- 3. The data
-
- Again, the headers and data are separated by a blank line.
-
- The response code line has the form
-
- <version> <responsecode> <responsestring>
-
- where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
- <responsecode> is a 3-digit response code indicating success or
- failure of the request, and <responsestring> is an optional
- human-readable string explaining what the response code means.
-
- This server parses the request and the headers, and then calls a
- function specific to the request type (<command>). Specifically,
- a request SPAM will be handled by a method do_SPAM(). If no
- such method exists the server sends an error response to the
- client. If it exists, it is called with no arguments:
-
- do_SPAM()
-
- Note that the request name is case sensitive (i.e. SPAM and spam
- are different requests).
-
- The various request details are stored in instance variables:
-
- - client_address is the client IP address in the form (host,
- port);
-
- - command, path and version are the broken-down request line;
-
- - headers is an instance of email.message.Message (or a derived
- class) containing the header information;
-
- - rfile is a file object open for reading positioned at the
- start of the optional input data part;
-
- - wfile is a file object open for writing.
-
- IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
-
- The first thing to be written must be the response line. Then
- follow 0 or more header lines, then a blank line, and then the
- actual data (if any). The meaning of the header lines depends on
- the command executed by the server; in most cases, when data is
- returned, there should be at least one header line of the form
-
- Content-type: <type>/<subtype>
-
- where <type> and <subtype> should be registered MIME types,
- e.g. "text/html" or "text/plain".
-
- """
-
- # The Python system version, truncated to its first component.
- sys_version = "Python/" + sys.version.split()[0]
-
- # The server software version. You may want to override this.
- # The format is multiple whitespace-separated strings,
- # where each string is of the form name[/version].
- server_version = "BaseHTTP/" + __version__
-
- error_message_format = DEFAULT_ERROR_MESSAGE
- error_content_type = DEFAULT_ERROR_CONTENT_TYPE
-
- # The default request version. This only affects responses up until
- # the point where the request line is parsed, so it mainly decides what
- # the client gets back when sending a malformed request line.
- # Most web servers default to HTTP 0.9, i.e. don't send a status line.
- default_request_version = "HTTP/0.9"
-
- def parse_request(self):
- """Parse a request (internal).
-
- The request should be stored in self.raw_requestline; the results
- are in self.command, self.path, self.request_version and
- self.headers.
-
- Return True for success, False for failure; on failure, any relevant
- error response has already been sent back.
-
- """
- self.command = None # set in case of error on the first line
- self.request_version = version = self.default_request_version
- self.close_connection = True
- requestline = str(self.raw_requestline, 'iso-8859-1')
- requestline = requestline.rstrip('\r\n')
- self.requestline = requestline
- words = requestline.split()
- if len(words) == 0:
- return False
-
- if len(words) >= 3: # Enough to determine protocol version
- version = words[-1]
- try:
- if not version.startswith('HTTP/'):
- raise ValueError
- base_version_number = version.split('/', 1)[1]
- version_number = base_version_number.split(".")
- # RFC 2145 section 3.1 says there can be only one "." and
- # - major and minor numbers MUST be treated as
- # separate integers;
- # - HTTP/2.4 is a lower version than HTTP/2.13, which in
- # turn is lower than HTTP/12.3;
- # - Leading zeros MUST be ignored by recipients.
- if len(version_number) != 2:
- raise ValueError
- version_number = int(version_number[0]), int(version_number[1])
- except (ValueError, IndexError):
- self.send_error(
- HTTPStatus.BAD_REQUEST,
- "Bad request version (%r)" % version)
- return False
- if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
- self.close_connection = False
- if version_number >= (2, 0):
- self.send_error(
- HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
- "Invalid HTTP version (%s)" % base_version_number)
- return False
- self.request_version = version
-
- if not 2 <= len(words) <= 3:
- self.send_error(
- HTTPStatus.BAD_REQUEST,
- "Bad request syntax (%r)" % requestline)
- return False
- command, path = words[:2]
- if len(words) == 2:
- self.close_connection = True
- if command != 'GET':
- self.send_error(
- HTTPStatus.BAD_REQUEST,
- "Bad HTTP/0.9 request type (%r)" % command)
- return False
- self.command, self.path = command, path
-
- # Examine the headers and look for a Connection directive.
- try:
- self.headers = http.client.parse_headers(self.rfile,
- _class=self.MessageClass)
- except http.client.LineTooLong as err:
- self.send_error(
- HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
- "Line too long",
- str(err))
- return False
- except http.client.HTTPException as err:
- self.send_error(
- HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
- "Too many headers",
- str(err)
- )
- return False
-
- conntype = self.headers.get('Connection', "")
- if conntype.lower() == 'close':
- self.close_connection = True
- elif (conntype.lower() == 'keep-alive' and
- self.protocol_version >= "HTTP/1.1"):
- self.close_connection = False
- # Examine the headers and look for an Expect directive
- expect = self.headers.get('Expect', "")
- if (expect.lower() == "100-continue" and
- self.protocol_version >= "HTTP/1.1" and
- self.request_version >= "HTTP/1.1"):
- if not self.handle_expect_100():
- return False
- return True
-
- def handle_expect_100(self):
- """Decide what to do with an "Expect: 100-continue" header.
-
- If the client is expecting a 100 Continue response, we must
- respond with either a 100 Continue or a final response before
- waiting for the request body. The default is to always respond
- with a 100 Continue. You can behave differently (for example,
- reject unauthorized requests) by overriding this method.
-
- This method should either return True (possibly after sending
- a 100 Continue response) or send an error response and return
- False.
-
- """
- self.send_response_only(HTTPStatus.CONTINUE)
- self.end_headers()
- return True
-
- def handle_one_request(self):
- """Handle a single HTTP request.
-
- You normally don't need to override this method; see the class
- __doc__ string for information on how to handle specific HTTP
- commands such as GET and POST.
-
- """
- try:
- self.raw_requestline = self.rfile.readline(65537)
- if len(self.raw_requestline) > 65536:
- self.requestline = ''
- self.request_version = ''
- self.command = ''
- self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
- return
- if not self.raw_requestline:
- self.close_connection = True
- return
- if not self.parse_request():
- # An error code has been sent, just exit
- return
- mname = 'do_' + self.command
- if not hasattr(self, mname):
- self.send_error(
- HTTPStatus.NOT_IMPLEMENTED,
- "Unsupported method (%r)" % self.command)
- return
- method = getattr(self, mname)
- method()
- self.wfile.flush() #actually send the response if not already done.
- except socket.timeout as e:
- #a read or a write timed out. Discard this connection
- self.log_error("Request timed out: %r", e)
- self.close_connection = True
- return
-
- def handle(self):
- """Handle multiple requests if necessary."""
- self.close_connection = True
-
- self.handle_one_request()
- while not self.close_connection:
- self.handle_one_request()
-
- def send_error(self, code, message=None, explain=None):
- """Send and log an error reply.
-
- Arguments are
- * code: an HTTP error code
- 3 digits
- * message: a simple optional 1 line reason phrase.
- *( HTAB / SP / VCHAR / %x80-FF )
- defaults to short entry matching the response code
- * explain: a detailed message defaults to the long entry
- matching the response code.
-
- This sends an error response (so it must be called before any
- output has been generated), logs the error, and finally sends
- a piece of HTML explaining the error to the user.
-
- """
-
- try:
- shortmsg, longmsg = self.responses[code]
- except KeyError:
- shortmsg, longmsg = '???', '???'
- if message is None:
- message = shortmsg
- if explain is None:
- explain = longmsg
- self.log_error("code %d, message %s", code, message)
- self.send_response(code, message)
- self.send_header('Connection', 'close')
-
- # Message body is omitted for cases described in:
- # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
- # - RFC7231: 6.3.6. 205(Reset Content)
- body = None
- if (code >= 200 and
- code not in (HTTPStatus.NO_CONTENT,
- HTTPStatus.RESET_CONTENT,
- HTTPStatus.NOT_MODIFIED)):
- # HTML encode to prevent Cross Site Scripting attacks
- # (see bug #1100201)
- content = (self.error_message_format % {
- 'code': code,
- 'message': html.escape(message, quote=False),
- 'explain': html.escape(explain, quote=False)
- })
- body = content.encode('UTF-8', 'replace')
- self.send_header("Content-Type", self.error_content_type)
- self.send_header('Content-Length', str(len(body)))
- self.end_headers()
-
- if self.command != 'HEAD' and body:
- self.wfile.write(body)
-
- def send_response(self, code, message=None):
- """Add the response header to the headers buffer and log the
- response code.
-
- Also send two standard headers with the server software
- version and the current date.
-
- """
- self.log_request(code)
- self.send_response_only(code, message)
- self.send_header('Server', self.version_string())
- self.send_header('Date', self.date_time_string())
-
- def send_response_only(self, code, message=None):
- """Send the response header only."""
- if self.request_version != 'HTTP/0.9':
- if message is None:
- if code in self.responses:
- message = self.responses[code][0]
- else:
- message = ''
- if not hasattr(self, '_headers_buffer'):
- self._headers_buffer = []
- self._headers_buffer.append(("%s %d %s\r\n" %
- (self.protocol_version, code, message)).encode(
- 'latin-1', 'strict'))
-
- def send_header(self, keyword, value):
- """Send a MIME header to the headers buffer."""
- if self.request_version != 'HTTP/0.9':
- if not hasattr(self, '_headers_buffer'):
- self._headers_buffer = []
- self._headers_buffer.append(
- ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
-
- if keyword.lower() == 'connection':
- if value.lower() == 'close':
- self.close_connection = True
- elif value.lower() == 'keep-alive':
- self.close_connection = False
-
- def end_headers(self):
- """Send the blank line ending the MIME headers."""
- if self.request_version != 'HTTP/0.9':
- self._headers_buffer.append(b"\r\n")
- self.flush_headers()
-
- def flush_headers(self):
- if hasattr(self, '_headers_buffer'):
- self.wfile.write(b"".join(self._headers_buffer))
- self._headers_buffer = []
-
- def log_request(self, code='-', size='-'):
- """Log an accepted request.
-
- This is called by send_response().
-
- """
- if isinstance(code, HTTPStatus):
- code = code.value
- self.log_message('"%s" %s %s',
- self.requestline, str(code), str(size))
-
- def log_error(self, format, *args):
- """Log an error.
-
- This is called when a request cannot be fulfilled. By
- default it passes the message on to log_message().
-
- Arguments are the same as for log_message().
-
- XXX This should go to the separate error log.
-
- """
-
- self.log_message(format, *args)
-
- def log_message(self, format, *args):
- """Log an arbitrary message.
-
- This is used by all other logging functions. Override
- it if you have specific logging wishes.
-
- The first argument, FORMAT, is a format string for the
- message to be logged. If the format string contains
- any % escapes requiring parameters, they should be
- specified as subsequent arguments (it's just like
- printf!).
-
- The client ip and current date/time are prefixed to
- every message.
-
- """
-
- sys.stderr.write("%s - - [%s] %s\n" %
- (self.address_string(),
- self.log_date_time_string(),
- format%args))
-
- def version_string(self):
- """Return the server software version string."""
- return self.server_version + ' ' + self.sys_version
-
- def date_time_string(self, timestamp=None):
- """Return the current date and time formatted for a message header."""
- if timestamp is None:
- timestamp = time.time()
- return email.utils.formatdate(timestamp, usegmt=True)
-
- def log_date_time_string(self):
- """Return the current time formatted for logging."""
- now = time.time()
- year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
- s = "%02d/%3s/%04d %02d:%02d:%02d" % (
- day, self.monthname[month], year, hh, mm, ss)
- return s
-
- weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
-
- monthname = [None,
- 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
-
- def address_string(self):
- """Return the client address."""
-
- return self.client_address[0]
-
- # Essentially static class variables
-
- # The version of the HTTP protocol we support.
- # Set this to HTTP/1.1 to enable automatic keepalive
- protocol_version = "HTTP/1.0"
-
- # MessageClass used to parse headers
- MessageClass = http.client.HTTPMessage
-
- # hack to maintain backwards compatibility
- responses = {
- v: (v.phrase, v.description)
- for v in HTTPStatus.__members__.values()
- }
-
-
-class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
-
- """Simple HTTP request handler with GET and HEAD commands.
-
- This serves files from the current directory and any of its
- subdirectories. The MIME type for files is determined by
- calling the .guess_type() method.
-
- The GET and HEAD requests are identical except that the HEAD
- request omits the actual contents of the file.
-
- """
-
- server_version = "SimpleHTTP/" + __version__
- extensions_map = _encodings_map_default = {
- '.gz': 'application/gzip',
- '.Z': 'application/octet-stream',
- '.bz2': 'application/x-bzip2',
- '.xz': 'application/x-xz',
- }
-
- def __init__(self, *args, directory=None, **kwargs):
- if directory is None:
- directory = os.getcwd()
- self.directory = os.fspath(directory)
- super().__init__(*args, **kwargs)
-
- def do_GET(self):
- """Serve a GET request."""
- f = self.send_head()
- if f:
- try:
- self.copyfile(f, self.wfile)
- finally:
- f.close()
-
- def do_HEAD(self):
- """Serve a HEAD request."""
- f = self.send_head()
- if f:
- f.close()
-
- def send_head(self):
- """Common code for GET and HEAD commands.
-
- This sends the response code and MIME headers.
-
- Return value is either a file object (which has to be copied
- to the outputfile by the caller unless the command was HEAD,
- and must be closed by the caller under all circumstances), or
- None, in which case the caller has nothing further to do.
-
- """
- path = self.translate_path(self.path)
- f = None
- if os.path.isdir(path):
- parts = urllib.parse.urlsplit(self.path)
- if not parts.path.endswith('/'):
- # redirect browser - doing basically what apache does
- self.send_response(HTTPStatus.MOVED_PERMANENTLY)
- new_parts = (parts[0], parts[1], parts[2] + '/',
- parts[3], parts[4])
- new_url = urllib.parse.urlunsplit(new_parts)
- self.send_header("Location", new_url)
- self.end_headers()
- return None
- for index in "index.html", "index.htm":
- index = os.path.join(path, index)
- if os.path.exists(index):
- path = index
- break
- else:
- return self.list_directory(path)
- ctype = self.guess_type(path)
- # check for trailing "/" which should return 404. See Issue17324
- # The test for this was added in test_httpserver.py
- # However, some OS platforms accept a trailingSlash as a filename
- # See discussion on python-dev and Issue34711 regarding
- # parseing and rejection of filenames with a trailing slash
- if path.endswith("/"):
- self.send_error(HTTPStatus.NOT_FOUND, "File not found")
- return None
- try:
- f = open(path, 'rb')
- except OSError:
- self.send_error(HTTPStatus.NOT_FOUND, "File not found")
- return None
-
- try:
- fs = os.fstat(f.fileno())
- # Use browser cache if possible
- if ("If-Modified-Since" in self.headers
- and "If-None-Match" not in self.headers):
- # compare If-Modified-Since and time of last file modification
- try:
- ims = email.utils.parsedate_to_datetime(
- self.headers["If-Modified-Since"])
- except (TypeError, IndexError, OverflowError, ValueError):
- # ignore ill-formed values
- pass
- else:
- if ims.tzinfo is None:
- # obsolete format with no timezone, cf.
- # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
- ims = ims.replace(tzinfo=datetime.timezone.utc)
- if ims.tzinfo is datetime.timezone.utc:
- # compare to UTC datetime of last modification
- last_modif = datetime.datetime.fromtimestamp(
- fs.st_mtime, datetime.timezone.utc)
- # remove microseconds, like in If-Modified-Since
- last_modif = last_modif.replace(microsecond=0)
-
- if last_modif <= ims:
- self.send_response(HTTPStatus.NOT_MODIFIED)
- self.end_headers()
- f.close()
- return None
-
- self.send_response(HTTPStatus.OK)
- self.send_header("Content-type", ctype)
- self.send_header("Content-Length", str(fs[6]))
- self.send_header("Last-Modified",
- self.date_time_string(fs.st_mtime))
- self.end_headers()
- return f
- except:
- f.close()
- raise
-
- def list_directory(self, path):
- """Helper to produce a directory listing (absent index.html).
-
- Return value is either a file object, or None (indicating an
- error). In either case, the headers are sent, making the
- interface the same as for send_head().
-
- """
- try:
- list = os.listdir(path)
- except OSError:
- self.send_error(
- HTTPStatus.NOT_FOUND,
- "No permission to list directory")
- return None
- list.sort(key=lambda a: a.lower())
- r = []
- try:
- displaypath = urllib.parse.unquote(self.path,
- errors='surrogatepass')
- except UnicodeDecodeError:
- displaypath = urllib.parse.unquote(path)
- displaypath = html.escape(displaypath, quote=False)
- enc = sys.getfilesystemencoding()
- title = 'Directory listing for %s' % displaypath
- r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
- '"http://www.w3.org/TR/html4/strict.dtd">')
- r.append('<html>\n<head>')
- r.append('<meta http-equiv="Content-Type" '
- 'content="text/html; charset=%s">' % enc)
- r.append('<title>%s</title>\n</head>' % title)
- r.append('<body>\n<h1>%s</h1>' % title)
- r.append('<hr>\n<ul>')
- for name in list:
- fullname = os.path.join(path, name)
- displayname = linkname = name
- # Append / for directories or @ for symbolic links
- if os.path.isdir(fullname):
- displayname = name + "/"
- linkname = name + "/"
- if os.path.islink(fullname):
- displayname = name + "@"
- # Note: a link to a directory displays with @ and links with /
- r.append('<li><a href="%s">%s</a></li>'
- % (urllib.parse.quote(linkname,
- errors='surrogatepass'),
- html.escape(displayname, quote=False)))
- r.append('</ul>\n<hr>\n</body>\n</html>\n')
- encoded = '\n'.join(r).encode(enc, 'surrogateescape')
- f = io.BytesIO()
- f.write(encoded)
- f.seek(0)
- self.send_response(HTTPStatus.OK)
- self.send_header("Content-type", "text/html; charset=%s" % enc)
- self.send_header("Content-Length", str(len(encoded)))
- self.end_headers()
- return f
-
- def translate_path(self, path):
- """Translate a /-separated PATH to the local filename syntax.
-
- Components that mean special things to the local file system
- (e.g. drive or directory names) are ignored. (XXX They should
- probably be diagnosed.)
-
- """
- # abandon query parameters
- path = path.split('?',1)[0]
- path = path.split('#',1)[0]
- # Don't forget explicit trailing slash when normalizing. Issue17324
- trailing_slash = path.rstrip().endswith('/')
- try:
- path = urllib.parse.unquote(path, errors='surrogatepass')
- except UnicodeDecodeError:
- path = urllib.parse.unquote(path)
- path = posixpath.normpath(path)
- words = path.split('/')
- words = filter(None, words)
- path = self.directory
- for word in words:
- if os.path.dirname(word) or word in (os.curdir, os.pardir):
- # Ignore components that are not a simple file/directory name
- continue
- path = os.path.join(path, word)
- if trailing_slash:
- path += '/'
- return path
-
- def copyfile(self, source, outputfile):
- """Copy all data between two file objects.
-
- The SOURCE argument is a file object open for reading
- (or anything with a read() method) and the DESTINATION
- argument is a file object open for writing (or
- anything with a write() method).
-
- The only reason for overriding this would be to change
- the block size or perhaps to replace newlines by CRLF
- -- note however that this the default server uses this
- to copy binary data as well.
-
- """
- shutil.copyfileobj(source, outputfile)
-
- def guess_type(self, path):
- """Guess the type of a file.
-
- Argument is a PATH (a filename).
-
- Return value is a string of the form type/subtype,
- usable for a MIME Content-type header.
-
- The default implementation looks the file's extension
- up in the table self.extensions_map, using application/octet-stream
- as a default; however it would be permissible (if
- slow) to look inside the data to make a better guess.
-
- """
- base, ext = posixpath.splitext(path)
- if ext in self.extensions_map:
- return self.extensions_map[ext]
- ext = ext.lower()
- if ext in self.extensions_map:
- return self.extensions_map[ext]
- guess, _ = mimetypes.guess_type(path)
- if guess:
- return guess
- return 'application/octet-stream'
-
-
-# Utilities for CGIHTTPRequestHandler
-
-def _url_collapse_path(path):
- """
- Given a URL path, remove extra '/'s and '.' path elements and collapse
- any '..' references and returns a collapsed path.
-
- Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
- The utility of this function is limited to is_cgi method and helps
- preventing some security attacks.
-
- Returns: The reconstituted URL, which will always start with a '/'.
-
- Raises: IndexError if too many '..' occur within the path.
-
- """
- # Query component should not be involved.
- path, _, query = path.partition('?')
- path = urllib.parse.unquote(path)
-
- # Similar to os.path.split(os.path.normpath(path)) but specific to URL
- # path semantics rather than local operating system semantics.
- path_parts = path.split('/')
- head_parts = []
- for part in path_parts[:-1]:
- if part == '..':
- head_parts.pop() # IndexError if more '..' than prior parts
- elif part and part != '.':
- head_parts.append( part )
- if path_parts:
- tail_part = path_parts.pop()
- if tail_part:
- if tail_part == '..':
- head_parts.pop()
- tail_part = ''
- elif tail_part == '.':
- tail_part = ''
- else:
- tail_part = ''
-
- if query:
- tail_part = '?'.join((tail_part, query))
-
- splitpath = ('/' + '/'.join(head_parts), tail_part)
- collapsed_path = "/".join(splitpath)
-
- return collapsed_path
-
-
-
-nobody = None
-
-def nobody_uid():
- """Internal routine to get nobody's uid"""
- global nobody
- if nobody:
- return nobody
- try:
- import pwd
- except ImportError:
- return -1
- try:
- nobody = pwd.getpwnam('nobody')[2]
- except KeyError:
- nobody = 1 + max(x[2] for x in pwd.getpwall())
- return nobody
-
-
-def executable(path):
- """Test for executable file."""
- return os.access(path, os.X_OK)
-
-
-class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
-
- """Complete HTTP server with GET, HEAD and POST commands.
-
- GET and HEAD also support running CGI scripts.
-
- The POST command is *only* implemented for CGI scripts.
-
- """
-
- # Determine platform specifics
- have_fork = hasattr(os, 'fork')
-
- # Make rfile unbuffered -- we need to read one line and then pass
- # the rest to a subprocess, so we can't use buffered input.
- rbufsize = 0
-
- def do_POST(self):
- """Serve a POST request.
-
- This is only implemented for CGI scripts.
-
- """
-
- if self.is_cgi():
- self.run_cgi()
- else:
- self.send_error(
- HTTPStatus.NOT_IMPLEMENTED,
- "Can only POST to CGI scripts")
-
- def send_head(self):
- """Version of send_head that support CGI scripts"""
- if self.is_cgi():
- return self.run_cgi()
- else:
- return SimpleHTTPRequestHandler.send_head(self)
-
- def is_cgi(self):
- """Test whether self.path corresponds to a CGI script.
-
- Returns True and updates the cgi_info attribute to the tuple
- (dir, rest) if self.path requires running a CGI script.
- Returns False otherwise.
-
- If any exception is raised, the caller should assume that
- self.path was rejected as invalid and act accordingly.
-
- The default implementation tests whether the normalized url
- path begins with one of the strings in self.cgi_directories
- (and the next character is a '/' or the end of the string).
-
- """
- collapsed_path = _url_collapse_path(self.path)
- dir_sep = collapsed_path.find('/', 1)
- while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
- dir_sep = collapsed_path.find('/', dir_sep+1)
- if dir_sep > 0:
- head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
- self.cgi_info = head, tail
- return True
- return False
-
-
- cgi_directories = ['/cgi-bin', '/htbin']
-
- def is_executable(self, path):
- """Test whether argument path is an executable file."""
- return executable(path)
-
- def is_python(self, path):
- """Test whether argument path is a Python script."""
- head, tail = os.path.splitext(path)
- return tail.lower() in (".py", ".pyw")
-
- def run_cgi(self):
- """Execute a CGI script."""
- dir, rest = self.cgi_info
- path = dir + '/' + rest
- i = path.find('/', len(dir)+1)
- while i >= 0:
- nextdir = path[:i]
- nextrest = path[i+1:]
-
- scriptdir = self.translate_path(nextdir)
- if os.path.isdir(scriptdir):
- dir, rest = nextdir, nextrest
- i = path.find('/', len(dir)+1)
- else:
- break
-
- # find an explicit query string, if present.
- rest, _, query = rest.partition('?')
-
- # dissect the part after the directory name into a script name &
- # a possible additional path, to be stored in PATH_INFO.
- i = rest.find('/')
- if i >= 0:
- script, rest = rest[:i], rest[i:]
- else:
- script, rest = rest, ''
-
- scriptname = dir + '/' + script
- scriptfile = self.translate_path(scriptname)
- if not os.path.exists(scriptfile):
- self.send_error(
- HTTPStatus.NOT_FOUND,
- "No such CGI script (%r)" % scriptname)
- return
- if not os.path.isfile(scriptfile):
- self.send_error(
- HTTPStatus.FORBIDDEN,
- "CGI script is not a plain file (%r)" % scriptname)
- return
- ispy = self.is_python(scriptname)
- if self.have_fork or not ispy:
- if not self.is_executable(scriptfile):
- self.send_error(
- HTTPStatus.FORBIDDEN,
- "CGI script is not executable (%r)" % scriptname)
- return
-
- # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
- # XXX Much of the following could be prepared ahead of time!
- env = copy.deepcopy(os.environ)
- env['SERVER_SOFTWARE'] = self.version_string()
- env['SERVER_NAME'] = self.server.server_name
- env['GATEWAY_INTERFACE'] = 'CGI/1.1'
- env['SERVER_PROTOCOL'] = self.protocol_version
- env['SERVER_PORT'] = str(self.server.server_port)
- env['REQUEST_METHOD'] = self.command
- uqrest = urllib.parse.unquote(rest)
- env['PATH_INFO'] = uqrest
- env['PATH_TRANSLATED'] = self.translate_path(uqrest)
- env['SCRIPT_NAME'] = scriptname
- if query:
- env['QUERY_STRING'] = query
- env['REMOTE_ADDR'] = self.client_address[0]
- authorization = self.headers.get("authorization")
- if authorization:
- authorization = authorization.split()
- if len(authorization) == 2:
- import base64, binascii
- env['AUTH_TYPE'] = authorization[0]
- if authorization[0].lower() == "basic":
- try:
- authorization = authorization[1].encode('ascii')
- authorization = base64.decodebytes(authorization).\
- decode('ascii')
- except (binascii.Error, UnicodeError):
- pass
- else:
- authorization = authorization.split(':')
- if len(authorization) == 2:
- env['REMOTE_USER'] = authorization[0]
- # XXX REMOTE_IDENT
- if self.headers.get('content-type') is None:
- env['CONTENT_TYPE'] = self.headers.get_content_type()
- else:
- env['CONTENT_TYPE'] = self.headers['content-type']
- length = self.headers.get('content-length')
- if length:
- env['CONTENT_LENGTH'] = length
- referer = self.headers.get('referer')
- if referer:
- env['HTTP_REFERER'] = referer
- accept = self.headers.get_all('accept', ())
- env['HTTP_ACCEPT'] = ','.join(accept)
- ua = self.headers.get('user-agent')
- if ua:
- env['HTTP_USER_AGENT'] = ua
- co = filter(None, self.headers.get_all('cookie', []))
- cookie_str = ', '.join(co)
- if cookie_str:
- env['HTTP_COOKIE'] = cookie_str
- # XXX Other HTTP_* headers
- # Since we're setting the env in the parent, provide empty
- # values to override previously set values
- for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
- 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
- env.setdefault(k, "")
-
- self.send_response(HTTPStatus.OK, "Script output follows")
- self.flush_headers()
-
- decoded_query = query.replace('+', ' ')
-
- if self.have_fork:
- # Unix -- fork as we should
- args = [script]
- if '=' not in decoded_query:
- args.append(decoded_query)
- nobody = nobody_uid()
- self.wfile.flush() # Always flush before forking
- pid = os.fork()
- if pid != 0:
- # Parent
- pid, sts = os.waitpid(pid, 0)
- # throw away additional data [see bug #427345]
- while select.select([self.rfile], [], [], 0)[0]:
- if not self.rfile.read(1):
- break
- exitcode = os.waitstatus_to_exitcode(sts)
- if exitcode:
- self.log_error(f"CGI script exit code {exitcode}")
- return
- # Child
- try:
- try:
- os.setuid(nobody)
- except OSError:
- pass
- os.dup2(self.rfile.fileno(), 0)
- os.dup2(self.wfile.fileno(), 1)
- os.execve(scriptfile, args, env)
- except:
- self.server.handle_error(self.request, self.client_address)
- os._exit(127)
-
- else:
- # Non-Unix -- use subprocess
- import subprocess
- cmdline = [scriptfile]
- if self.is_python(scriptfile):
- interp = sys.executable
- if interp.lower().endswith("w.exe"):
- # On Windows, use python.exe, not pythonw.exe
- interp = interp[:-5] + interp[-4:]
- cmdline = [interp, '-u'] + cmdline
- if '=' not in query:
- cmdline.append(query)
- self.log_message("command: %s", subprocess.list2cmdline(cmdline))
- try:
- nbytes = int(length)
- except (TypeError, ValueError):
- nbytes = 0
- p = subprocess.Popen(cmdline,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env = env
- )
- if self.command.lower() == "post" and nbytes > 0:
- data = self.rfile.read(nbytes)
- else:
- data = None
- # throw away additional data [see bug #427345]
- while select.select([self.rfile._sock], [], [], 0)[0]:
- if not self.rfile._sock.recv(1):
- break
- stdout, stderr = p.communicate(data)
- self.wfile.write(stdout)
- if stderr:
- self.log_error('%s', stderr)
- p.stderr.close()
- p.stdout.close()
- status = p.returncode
- if status:
- self.log_error("CGI script exit status %#x", status)
- else:
- self.log_message("CGI script exited OK")
-
-
-def _get_best_family(*address):
- infos = socket.getaddrinfo(
- *address,
- type=socket.SOCK_STREAM,
- flags=socket.AI_PASSIVE,
- )
- family, type, proto, canonname, sockaddr = next(iter(infos))
- return family, sockaddr
-
-
-def test(HandlerClass=BaseHTTPRequestHandler,
- ServerClass=ThreadingHTTPServer,
- protocol="HTTP/1.0", port=8000, bind=None):
- """Test the HTTP request handler class.
-
- This runs an HTTP server on port 8000 (or the port argument).
-
- """
- ServerClass.address_family, addr = _get_best_family(bind, port)
-
- HandlerClass.protocol_version = protocol
- with ServerClass(addr, HandlerClass) as httpd:
- host, port = httpd.socket.getsockname()[:2]
- url_host = f'[{host}]' if ':' in host else host
- print(
- f"Serving HTTP on {host} port {port} "
- f"(http://{url_host}:{port}/) ..."
- )
- try:
- httpd.serve_forever()
- except KeyboardInterrupt:
- print("\nKeyboard interrupt received, exiting.")
- sys.exit(0)
-
-if __name__ == '__main__':
- import argparse
-
- parser = argparse.ArgumentParser()
- parser.add_argument('--cgi', action='store_true',
- help='Run as CGI Server')
- parser.add_argument('--bind', '-b', metavar='ADDRESS',
- help='Specify alternate bind address '
- '[default: all interfaces]')
- parser.add_argument('--directory', '-d', default=os.getcwd(),
- help='Specify alternative directory '
- '[default:current directory]')
- parser.add_argument('port', action='store',
- default=8000, type=int,
- nargs='?',
- help='Specify alternate port [default: 8000]')
- args = parser.parse_args()
- if args.cgi:
- handler_class = CGIHTTPRequestHandler
- else:
- handler_class = partial(SimpleHTTPRequestHandler,
- directory=args.directory)
-
- # ensure dual-stack is not disabled; ref #38907
- class DualStackServer(ThreadingHTTPServer):
- def server_bind(self):
- # suppress exception when protocol is IPv4
- with contextlib.suppress(Exception):
- self.socket.setsockopt(
- socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
- return super().server_bind()
-
- test(
- HandlerClass=handler_class,
- ServerClass=DualStackServer,
- port=args.port,
- bind=args.bind,
- )
+"""HTTP server classes.\r
+\r
+Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see\r
+SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,\r
+and CGIHTTPRequestHandler for CGI scripts.\r
+\r
+It does, however, optionally implement HTTP/1.1 persistent connections,\r
+as of version 0.3.\r
+\r
+Notes on CGIHTTPRequestHandler\r
+------------------------------\r
+\r
+This class implements GET and POST requests to cgi-bin scripts.\r
+\r
+If the os.fork() function is not present (e.g. on Windows),\r
+subprocess.Popen() is used as a fallback, with slightly altered semantics.\r
+\r
+In all cases, the implementation is intentionally naive -- all\r
+requests are executed synchronously.\r
+\r
+SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL\r
+-- it may execute arbitrary Python code or external programs.\r
+\r
+Note that status code 200 is sent prior to execution of a CGI script, so\r
+scripts cannot send other status codes such as 302 (redirect).\r
+\r
+XXX To do:\r
+\r
+- log requests even later (to capture byte count)\r
+- log user-agent header and other interesting goodies\r
+- send error log to separate file\r
+"""\r
+\r
+\r
+# See also:\r
+#\r
+# HTTP Working Group T. Berners-Lee\r
+# INTERNET-DRAFT R. T. Fielding\r
+# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen\r
+# Expires September 8, 1995 March 8, 1995\r
+#\r
+# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt\r
+#\r
+# and\r
+#\r
+# Network Working Group R. Fielding\r
+# Request for Comments: 2616 et al\r
+# Obsoletes: 2068 June 1999\r
+# Category: Standards Track\r
+#\r
+# URL: http://www.faqs.org/rfcs/rfc2616.html\r
+\r
+# Log files\r
+# ---------\r
+#\r
+# Here's a quote from the NCSA httpd docs about log file format.\r
+#\r
+# | The logfile format is as follows. Each line consists of:\r
+# |\r
+# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb\r
+# |\r
+# | host: Either the DNS name or the IP number of the remote client\r
+# | rfc931: Any information returned by identd for this person,\r
+# | - otherwise.\r
+# | authuser: If user sent a userid for authentication, the user name,\r
+# | - otherwise.\r
+# | DD: Day\r
+# | Mon: Month (calendar name)\r
+# | YYYY: Year\r
+# | hh: hour (24-hour format, the machine's timezone)\r
+# | mm: minutes\r
+# | ss: seconds\r
+# | request: The first line of the HTTP request as sent by the client.\r
+# | ddd: the status code returned by the server, - if not available.\r
+# | bbbb: the total number of bytes sent,\r
+# | *not including the HTTP/1.0 header*, - if not available\r
+# |\r
+# | You can determine the name of the file accessed through request.\r
+#\r
+# (Actually, the latter is only true if you know the server configuration\r
+# at the time the request was made!)\r
+\r
+__version__ = "0.6"\r
+\r
+__all__ = [\r
+ "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",\r
+ "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",\r
+]\r
+\r
+import copy\r
+import datetime\r
+import email.utils\r
+import html\r
+import http.client\r
+import io\r
+import mimetypes\r
+import os\r
+import posixpath\r
+import select\r
+import shutil\r
+import socket # For gethostbyaddr()\r
+import socketserver\r
+import sys\r
+import time\r
+import urllib.parse\r
+import contextlib\r
+from functools import partial\r
+\r
+from http import HTTPStatus\r
+\r
+\r
+# Default error message template\r
+DEFAULT_ERROR_MESSAGE = """\\r
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\r
+ "http://www.w3.org/TR/html4/strict.dtd">\r
+<html>\r
+ <head>\r
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8">\r
+ <title>Error response</title>\r
+ </head>\r
+ <body>\r
+ <h1>Error response</h1>\r
+ <p>Error code: %(code)d</p>\r
+ <p>Message: %(message)s.</p>\r
+ <p>Error code explanation: %(code)s - %(explain)s.</p>\r
+ </body>\r
+</html>\r
+"""\r
+\r
+DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"\r
+\r
+class HTTPServer(socketserver.TCPServer):\r
+\r
+ allow_reuse_address = 1 # Seems to make sense in testing environment\r
+\r
+ def server_bind(self):\r
+ """Override server_bind to store the server name."""\r
+ socketserver.TCPServer.server_bind(self)\r
+ host, port = self.server_address[:2]\r
+ self.server_name = socket.getfqdn(host)\r
+ self.server_port = port\r
+\r
+\r
+class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):\r
+ daemon_threads = True\r
+\r
+\r
+class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):\r
+\r
+ """HTTP request handler base class.\r
+\r
+ The following explanation of HTTP serves to guide you through the\r
+ code as well as to expose any misunderstandings I may have about\r
+ HTTP (so you don't need to read the code to figure out I'm wrong\r
+ :-).\r
+\r
+ HTTP (HyperText Transfer Protocol) is an extensible protocol on\r
+ top of a reliable stream transport (e.g. TCP/IP). The protocol\r
+ recognizes three parts to a request:\r
+\r
+ 1. One line identifying the request type and path\r
+ 2. An optional set of RFC-822-style headers\r
+ 3. An optional data part\r
+\r
+ The headers and data are separated by a blank line.\r
+\r
+ The first line of the request has the form\r
+\r
+ <command> <path> <version>\r
+\r
+ where <command> is a (case-sensitive) keyword such as GET or POST,\r
+ <path> is a string containing path information for the request,\r
+ and <version> should be the string "HTTP/1.0" or "HTTP/1.1".\r
+ <path> is encoded using the URL encoding scheme (using %xx to signify\r
+ the ASCII character with hex code xx).\r
+\r
+ The specification specifies that lines are separated by CRLF but\r
+ for compatibility with the widest range of clients recommends\r
+ servers also handle LF. Similarly, whitespace in the request line\r
+ is treated sensibly (allowing multiple spaces between components\r
+ and allowing trailing whitespace).\r
+\r
+ Similarly, for output, lines ought to be separated by CRLF pairs\r
+ but most clients grok LF characters just fine.\r
+\r
+ If the first line of the request has the form\r
+\r
+ <command> <path>\r
+\r
+ (i.e. <version> is left out) then this is assumed to be an HTTP\r
+ 0.9 request; this form has no optional headers and data part and\r
+ the reply consists of just the data.\r
+\r
+ The reply form of the HTTP 1.x protocol again has three parts:\r
+\r
+ 1. One line giving the response code\r
+ 2. An optional set of RFC-822-style headers\r
+ 3. The data\r
+\r
+ Again, the headers and data are separated by a blank line.\r
+\r
+ The response code line has the form\r
+\r
+ <version> <responsecode> <responsestring>\r
+\r
+ where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),\r
+ <responsecode> is a 3-digit response code indicating success or\r
+ failure of the request, and <responsestring> is an optional\r
+ human-readable string explaining what the response code means.\r
+\r
+ This server parses the request and the headers, and then calls a\r
+ function specific to the request type (<command>). Specifically,\r
+ a request SPAM will be handled by a method do_SPAM(). If no\r
+ such method exists the server sends an error response to the\r
+ client. If it exists, it is called with no arguments:\r
+\r
+ do_SPAM()\r
+\r
+ Note that the request name is case sensitive (i.e. SPAM and spam\r
+ are different requests).\r
+\r
+ The various request details are stored in instance variables:\r
+\r
+ - client_address is the client IP address in the form (host,\r
+ port);\r
+\r
+ - command, path and version are the broken-down request line;\r
+\r
+ - headers is an instance of email.message.Message (or a derived\r
+ class) containing the header information;\r
+\r
+ - rfile is a file object open for reading positioned at the\r
+ start of the optional input data part;\r
+\r
+ - wfile is a file object open for writing.\r
+\r
+ IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!\r
+\r
+ The first thing to be written must be the response line. Then\r
+ follow 0 or more header lines, then a blank line, and then the\r
+ actual data (if any). The meaning of the header lines depends on\r
+ the command executed by the server; in most cases, when data is\r
+ returned, there should be at least one header line of the form\r
+\r
+ Content-type: <type>/<subtype>\r
+\r
+ where <type> and <subtype> should be registered MIME types,\r
+ e.g. "text/html" or "text/plain".\r
+\r
+ """\r
+\r
+ # The Python system version, truncated to its first component.\r
+ sys_version = "Python/" + sys.version.split()[0]\r
+\r
+ # The server software version. You may want to override this.\r
+ # The format is multiple whitespace-separated strings,\r
+ # where each string is of the form name[/version].\r
+ server_version = "BaseHTTP/" + __version__\r
+\r
+ error_message_format = DEFAULT_ERROR_MESSAGE\r
+ error_content_type = DEFAULT_ERROR_CONTENT_TYPE\r
+\r
+ # The default request version. This only affects responses up until\r
+ # the point where the request line is parsed, so it mainly decides what\r
+ # the client gets back when sending a malformed request line.\r
+ # Most web servers default to HTTP 0.9, i.e. don't send a status line.\r
+ default_request_version = "HTTP/0.9"\r
+\r
+ def parse_request(self):\r
+ """Parse a request (internal).\r
+\r
+ The request should be stored in self.raw_requestline; the results\r
+ are in self.command, self.path, self.request_version and\r
+ self.headers.\r
+\r
+ Return True for success, False for failure; on failure, any relevant\r
+ error response has already been sent back.\r
+\r
+ """\r
+ self.command = None # set in case of error on the first line\r
+ self.request_version = version = self.default_request_version\r
+ self.close_connection = True\r
+ requestline = str(self.raw_requestline, 'iso-8859-1')\r
+ requestline = requestline.rstrip('\r\n')\r
+ self.requestline = requestline\r
+ words = requestline.split()\r
+ if len(words) == 0:\r
+ return False\r
+\r
+ if len(words) >= 3: # Enough to determine protocol version\r
+ version = words[-1]\r
+ try:\r
+ if not version.startswith('HTTP/'):\r
+ raise ValueError\r
+ base_version_number = version.split('/', 1)[1]\r
+ version_number = base_version_number.split(".")\r
+ # RFC 2145 section 3.1 says there can be only one "." and\r
+ # - major and minor numbers MUST be treated as\r
+ # separate integers;\r
+ # - HTTP/2.4 is a lower version than HTTP/2.13, which in\r
+ # turn is lower than HTTP/12.3;\r
+ # - Leading zeros MUST be ignored by recipients.\r
+ if len(version_number) != 2:\r
+ raise ValueError\r
+ version_number = int(version_number[0]), int(version_number[1])\r
+ except (ValueError, IndexError):\r
+ self.send_error(\r
+ HTTPStatus.BAD_REQUEST,\r
+ "Bad request version (%r)" % version)\r
+ return False\r
+ if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":\r
+ self.close_connection = False\r
+ if version_number >= (2, 0):\r
+ self.send_error(\r
+ HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,\r
+ "Invalid HTTP version (%s)" % base_version_number)\r
+ return False\r
+ self.request_version = version\r
+\r
+ if not 2 <= len(words) <= 3:\r
+ self.send_error(\r
+ HTTPStatus.BAD_REQUEST,\r
+ "Bad request syntax (%r)" % requestline)\r
+ return False\r
+ command, path = words[:2]\r
+ if len(words) == 2:\r
+ self.close_connection = True\r
+ if command != 'GET':\r
+ self.send_error(\r
+ HTTPStatus.BAD_REQUEST,\r
+ "Bad HTTP/0.9 request type (%r)" % command)\r
+ return False\r
+ self.command, self.path = command, path\r
+\r
+ # Examine the headers and look for a Connection directive.\r
+ try:\r
+ self.headers = http.client.parse_headers(self.rfile,\r
+ _class=self.MessageClass)\r
+ except http.client.LineTooLong as err:\r
+ self.send_error(\r
+ HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,\r
+ "Line too long",\r
+ str(err))\r
+ return False\r
+ except http.client.HTTPException as err:\r
+ self.send_error(\r
+ HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,\r
+ "Too many headers",\r
+ str(err)\r
+ )\r
+ return False\r
+\r
+ conntype = self.headers.get('Connection', "")\r
+ if conntype.lower() == 'close':\r
+ self.close_connection = True\r
+ elif (conntype.lower() == 'keep-alive' and\r
+ self.protocol_version >= "HTTP/1.1"):\r
+ self.close_connection = False\r
+ # Examine the headers and look for an Expect directive\r
+ expect = self.headers.get('Expect', "")\r
+ if (expect.lower() == "100-continue" and\r
+ self.protocol_version >= "HTTP/1.1" and\r
+ self.request_version >= "HTTP/1.1"):\r
+ if not self.handle_expect_100():\r
+ return False\r
+ return True\r
+\r
+ def handle_expect_100(self):\r
+ """Decide what to do with an "Expect: 100-continue" header.\r
+\r
+ If the client is expecting a 100 Continue response, we must\r
+ respond with either a 100 Continue or a final response before\r
+ waiting for the request body. The default is to always respond\r
+ with a 100 Continue. You can behave differently (for example,\r
+ reject unauthorized requests) by overriding this method.\r
+\r
+ This method should either return True (possibly after sending\r
+ a 100 Continue response) or send an error response and return\r
+ False.\r
+\r
+ """\r
+ self.send_response_only(HTTPStatus.CONTINUE)\r
+ self.end_headers()\r
+ return True\r
+\r
+ def handle_one_request(self):\r
+ """Handle a single HTTP request.\r
+\r
+ You normally don't need to override this method; see the class\r
+ __doc__ string for information on how to handle specific HTTP\r
+ commands such as GET and POST.\r
+\r
+ """\r
+ try:\r
+ self.raw_requestline = self.rfile.readline(65537)\r
+ if len(self.raw_requestline) > 65536:\r
+ self.requestline = ''\r
+ self.request_version = ''\r
+ self.command = ''\r
+ self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)\r
+ return\r
+ if not self.raw_requestline:\r
+ self.close_connection = True\r
+ return\r
+ if not self.parse_request():\r
+ # An error code has been sent, just exit\r
+ return\r
+ mname = 'do_' + self.command\r
+ if not hasattr(self, mname):\r
+ self.send_error(\r
+ HTTPStatus.NOT_IMPLEMENTED,\r
+ "Unsupported method (%r)" % self.command)\r
+ return\r
+ method = getattr(self, mname)\r
+ method()\r
+ self.wfile.flush() #actually send the response if not already done.\r
+ except socket.timeout as e:\r
+ #a read or a write timed out. Discard this connection\r
+ self.log_error("Request timed out: %r", e)\r
+ self.close_connection = True\r
+ return\r
+\r
+ def handle(self):\r
+ """Handle multiple requests if necessary."""\r
+ self.close_connection = True\r
+\r
+ self.handle_one_request()\r
+ while not self.close_connection:\r
+ self.handle_one_request()\r
+\r
+ def send_error(self, code, message=None, explain=None):\r
+ """Send and log an error reply.\r
+\r
+ Arguments are\r
+ * code: an HTTP error code\r
+ 3 digits\r
+ * message: a simple optional 1 line reason phrase.\r
+ *( HTAB / SP / VCHAR / %x80-FF )\r
+ defaults to short entry matching the response code\r
+ * explain: a detailed message defaults to the long entry\r
+ matching the response code.\r
+\r
+ This sends an error response (so it must be called before any\r
+ output has been generated), logs the error, and finally sends\r
+ a piece of HTML explaining the error to the user.\r
+\r
+ """\r
+\r
+ try:\r
+ shortmsg, longmsg = self.responses[code]\r
+ except KeyError:\r
+ shortmsg, longmsg = '???', '???'\r
+ if message is None:\r
+ message = shortmsg\r
+ if explain is None:\r
+ explain = longmsg\r
+ self.log_error("code %d, message %s", code, message)\r
+ self.send_response(code, message)\r
+ self.send_header('Connection', 'close')\r
+\r
+ # Message body is omitted for cases described in:\r
+ # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)\r
+ # - RFC7231: 6.3.6. 205(Reset Content)\r
+ body = None\r
+ if (code >= 200 and\r
+ code not in (HTTPStatus.NO_CONTENT,\r
+ HTTPStatus.RESET_CONTENT,\r
+ HTTPStatus.NOT_MODIFIED)):\r
+ # HTML encode to prevent Cross Site Scripting attacks\r
+ # (see bug #1100201)\r
+ content = (self.error_message_format % {\r
+ 'code': code,\r
+ 'message': html.escape(message, quote=False),\r
+ 'explain': html.escape(explain, quote=False)\r
+ })\r
+ body = content.encode('UTF-8', 'replace')\r
+ self.send_header("Content-Type", self.error_content_type)\r
+ self.send_header('Content-Length', str(len(body)))\r
+ self.end_headers()\r
+\r
+ if self.command != 'HEAD' and body:\r
+ self.wfile.write(body)\r
+\r
+ def send_response(self, code, message=None):\r
+ """Add the response header to the headers buffer and log the\r
+ response code.\r
+\r
+ Also send two standard headers with the server software\r
+ version and the current date.\r
+\r
+ """\r
+ self.log_request(code)\r
+ self.send_response_only(code, message)\r
+ self.send_header('Server', self.version_string())\r
+ self.send_header('Date', self.date_time_string())\r
+\r
+ def send_response_only(self, code, message=None):\r
+ """Send the response header only."""\r
+ if self.request_version != 'HTTP/0.9':\r
+ if message is None:\r
+ if code in self.responses:\r
+ message = self.responses[code][0]\r
+ else:\r
+ message = ''\r
+ if not hasattr(self, '_headers_buffer'):\r
+ self._headers_buffer = []\r
+ self._headers_buffer.append(("%s %d %s\r\n" %\r
+ (self.protocol_version, code, message)).encode(\r
+ 'latin-1', 'strict'))\r
+\r
+ def send_header(self, keyword, value):\r
+ """Send a MIME header to the headers buffer."""\r
+ if self.request_version != 'HTTP/0.9':\r
+ if not hasattr(self, '_headers_buffer'):\r
+ self._headers_buffer = []\r
+ self._headers_buffer.append(\r
+ ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))\r
+\r
+ if keyword.lower() == 'connection':\r
+ if value.lower() == 'close':\r
+ self.close_connection = True\r
+ elif value.lower() == 'keep-alive':\r
+ self.close_connection = False\r
+\r
+ def end_headers(self):\r
+ """Send the blank line ending the MIME headers."""\r
+ if self.request_version != 'HTTP/0.9':\r
+ self._headers_buffer.append(b"\r\n")\r
+ self.flush_headers()\r
+\r
+ def flush_headers(self):\r
+ if hasattr(self, '_headers_buffer'):\r
+ self.wfile.write(b"".join(self._headers_buffer))\r
+ self._headers_buffer = []\r
+\r
+ def log_request(self, code='-', size='-'):\r
+ """Log an accepted request.\r
+\r
+ This is called by send_response().\r
+\r
+ """\r
+ if isinstance(code, HTTPStatus):\r
+ code = code.value\r
+ self.log_message('"%s" %s %s',\r
+ self.requestline, str(code), str(size))\r
+\r
+ def log_error(self, format, *args):\r
+ """Log an error.\r
+\r
+ This is called when a request cannot be fulfilled. By\r
+ default it passes the message on to log_message().\r
+\r
+ Arguments are the same as for log_message().\r
+\r
+ XXX This should go to the separate error log.\r
+\r
+ """\r
+\r
+ self.log_message(format, *args)\r
+\r
+ def log_message(self, format, *args):\r
+ """Log an arbitrary message.\r
+\r
+ This is used by all other logging functions. Override\r
+ it if you have specific logging wishes.\r
+\r
+ The first argument, FORMAT, is a format string for the\r
+ message to be logged. If the format string contains\r
+ any % escapes requiring parameters, they should be\r
+ specified as subsequent arguments (it's just like\r
+ printf!).\r
+\r
+ The client ip and current date/time are prefixed to\r
+ every message.\r
+\r
+ """\r
+\r
+ sys.stderr.write("%s - - [%s] %s\n" %\r
+ (self.address_string(),\r
+ self.log_date_time_string(),\r
+ format%args))\r
+\r
+ def version_string(self):\r
+ """Return the server software version string."""\r
+ return self.server_version + ' ' + self.sys_version\r
+\r
+ def date_time_string(self, timestamp=None):\r
+ """Return the current date and time formatted for a message header."""\r
+ if timestamp is None:\r
+ timestamp = time.time()\r
+ return email.utils.formatdate(timestamp, usegmt=True)\r
+\r
+ def log_date_time_string(self):\r
+ """Return the current time formatted for logging."""\r
+ now = time.time()\r
+ year, month, day, hh, mm, ss, x, y, z = time.localtime(now)\r
+ s = "%02d/%3s/%04d %02d:%02d:%02d" % (\r
+ day, self.monthname[month], year, hh, mm, ss)\r
+ return s\r
+\r
+ weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']\r
+\r
+ monthname = [None,\r
+ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',\r
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']\r
+\r
+ def address_string(self):\r
+ """Return the client address."""\r
+\r
+ return self.client_address[0]\r
+\r
+ # Essentially static class variables\r
+\r
+ # The version of the HTTP protocol we support.\r
+ # Set this to HTTP/1.1 to enable automatic keepalive\r
+ protocol_version = "HTTP/1.0"\r
+\r
+ # MessageClass used to parse headers\r
+ MessageClass = http.client.HTTPMessage\r
+\r
+ # hack to maintain backwards compatibility\r
+ responses = {\r
+ v: (v.phrase, v.description)\r
+ for v in HTTPStatus.__members__.values()\r
+ }\r
+\r
+\r
+class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):\r
+\r
+ """Simple HTTP request handler with GET and HEAD commands.\r
+\r
+ This serves files from the current directory and any of its\r
+ subdirectories. The MIME type for files is determined by\r
+ calling the .guess_type() method.\r
+\r
+ The GET and HEAD requests are identical except that the HEAD\r
+ request omits the actual contents of the file.\r
+\r
+ """\r
+\r
+ server_version = "SimpleHTTP/" + __version__\r
+ extensions_map = _encodings_map_default = {\r
+ '.gz': 'application/gzip',\r
+ '.Z': 'application/octet-stream',\r
+ '.bz2': 'application/x-bzip2',\r
+ '.xz': 'application/x-xz',\r
+ }\r
+\r
+ def __init__(self, *args, directory=None, **kwargs):\r
+ if directory is None:\r
+ directory = os.getcwd()\r
+ self.directory = os.fspath(directory)\r
+ super().__init__(*args, **kwargs)\r
+\r
+ def do_GET(self):\r
+ """Serve a GET request."""\r
+ f = self.send_head()\r
+ if f:\r
+ try:\r
+ self.copyfile(f, self.wfile)\r
+ finally:\r
+ f.close()\r
+\r
+ def do_HEAD(self):\r
+ """Serve a HEAD request."""\r
+ f = self.send_head()\r
+ if f:\r
+ f.close()\r
+\r
+ def send_head(self):\r
+ """Common code for GET and HEAD commands.\r
+\r
+ This sends the response code and MIME headers.\r
+\r
+ Return value is either a file object (which has to be copied\r
+ to the outputfile by the caller unless the command was HEAD,\r
+ and must be closed by the caller under all circumstances), or\r
+ None, in which case the caller has nothing further to do.\r
+\r
+ """\r
+ path = self.translate_path(self.path)\r
+ f = None\r
+ if os.path.isdir(path):\r
+ parts = urllib.parse.urlsplit(self.path)\r
+ if not parts.path.endswith('/'):\r
+ # redirect browser - doing basically what apache does\r
+ self.send_response(HTTPStatus.MOVED_PERMANENTLY)\r
+ new_parts = (parts[0], parts[1], parts[2] + '/',\r
+ parts[3], parts[4])\r
+ new_url = urllib.parse.urlunsplit(new_parts)\r
+ self.send_header("Location", new_url)\r
+ self.end_headers()\r
+ return None\r
+ for index in "index.html", "index.htm":\r
+ index = os.path.join(path, index)\r
+ if os.path.exists(index):\r
+ path = index\r
+ break\r
+ else:\r
+ return self.list_directory(path)\r
+ ctype = self.guess_type(path)\r
+ # check for trailing "/" which should return 404. See Issue17324\r
+ # The test for this was added in test_httpserver.py\r
+ # However, some OS platforms accept a trailingSlash as a filename\r
+ # See discussion on python-dev and Issue34711 regarding\r
+ # parseing and rejection of filenames with a trailing slash\r
+ if path.endswith("/"):\r
+ self.send_error(HTTPStatus.NOT_FOUND, "File not found")\r
+ return None\r
+ try:\r
+ f = open(path, 'rb')\r
+ except OSError:\r
+ self.send_error(HTTPStatus.NOT_FOUND, "File not found")\r
+ return None\r
+\r
+ try:\r
+ fs = os.fstat(f.fileno())\r
+ # Use browser cache if possible\r
+ if ("If-Modified-Since" in self.headers\r
+ and "If-None-Match" not in self.headers):\r
+ # compare If-Modified-Since and time of last file modification\r
+ try:\r
+ ims = email.utils.parsedate_to_datetime(\r
+ self.headers["If-Modified-Since"])\r
+ except (TypeError, IndexError, OverflowError, ValueError):\r
+ # ignore ill-formed values\r
+ pass\r
+ else:\r
+ if ims.tzinfo is None:\r
+ # obsolete format with no timezone, cf.\r
+ # https://tools.ietf.org/html/rfc7231#section-7.1.1.1\r
+ ims = ims.replace(tzinfo=datetime.timezone.utc)\r
+ if ims.tzinfo is datetime.timezone.utc:\r
+ # compare to UTC datetime of last modification\r
+ last_modif = datetime.datetime.fromtimestamp(\r
+ fs.st_mtime, datetime.timezone.utc)\r
+ # remove microseconds, like in If-Modified-Since\r
+ last_modif = last_modif.replace(microsecond=0)\r
+\r
+ if last_modif <= ims:\r
+ self.send_response(HTTPStatus.NOT_MODIFIED)\r
+ self.end_headers()\r
+ f.close()\r
+ return None\r
+\r
+ self.send_response(HTTPStatus.OK)\r
+ self.send_header("Content-type", ctype)\r
+ self.send_header("Content-Length", str(fs[6]))\r
+ self.send_header("Last-Modified",\r
+ self.date_time_string(fs.st_mtime))\r
+ self.end_headers()\r
+ return f\r
+ except:\r
+ f.close()\r
+ raise\r
+\r
+ def list_directory(self, path):\r
+ """Helper to produce a directory listing (absent index.html).\r
+\r
+ Return value is either a file object, or None (indicating an\r
+ error). In either case, the headers are sent, making the\r
+ interface the same as for send_head().\r
+\r
+ """\r
+ try:\r
+ list = os.listdir(path)\r
+ except OSError:\r
+ self.send_error(\r
+ HTTPStatus.NOT_FOUND,\r
+ "No permission to list directory")\r
+ return None\r
+ list.sort(key=lambda a: a.lower())\r
+ r = []\r
+ try:\r
+ displaypath = urllib.parse.unquote(self.path,\r
+ errors='surrogatepass')\r
+ except UnicodeDecodeError:\r
+ displaypath = urllib.parse.unquote(path)\r
+ displaypath = html.escape(displaypath, quote=False)\r
+ enc = sys.getfilesystemencoding()\r
+ title = 'Directory listing for %s' % displaypath\r
+ r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '\r
+ '"http://www.w3.org/TR/html4/strict.dtd">')\r
+ r.append('<html>\n<head>')\r
+ r.append('<meta http-equiv="Content-Type" '\r
+ 'content="text/html; charset=%s">' % enc)\r
+ r.append('<title>%s</title>\n</head>' % title)\r
+ r.append('<body>\n<h1>%s</h1>' % title)\r
+ r.append('<hr>\n<ul>')\r
+ for name in list:\r
+ fullname = os.path.join(path, name)\r
+ displayname = linkname = name\r
+ # Append / for directories or @ for symbolic links\r
+ if os.path.isdir(fullname):\r
+ displayname = name + "/"\r
+ linkname = name + "/"\r
+ if os.path.islink(fullname):\r
+ displayname = name + "@"\r
+ # Note: a link to a directory displays with @ and links with /\r
+ r.append('<li><a href="%s">%s</a></li>'\r
+ % (urllib.parse.quote(linkname,\r
+ errors='surrogatepass'),\r
+ html.escape(displayname, quote=False)))\r
+ r.append('</ul>\n<hr>\n</body>\n</html>\n')\r
+ encoded = '\n'.join(r).encode(enc, 'surrogateescape')\r
+ f = io.BytesIO()\r
+ f.write(encoded)\r
+ f.seek(0)\r
+ self.send_response(HTTPStatus.OK)\r
+ self.send_header("Content-type", "text/html; charset=%s" % enc)\r
+ self.send_header("Content-Length", str(len(encoded)))\r
+ self.end_headers()\r
+ return f\r
+\r
+ def translate_path(self, path):\r
+ """Translate a /-separated PATH to the local filename syntax.\r
+\r
+ Components that mean special things to the local file system\r
+ (e.g. drive or directory names) are ignored. (XXX They should\r
+ probably be diagnosed.)\r
+\r
+ """\r
+ # abandon query parameters\r
+ path = path.split('?',1)[0]\r
+ path = path.split('#',1)[0]\r
+ # Don't forget explicit trailing slash when normalizing. Issue17324\r
+ trailing_slash = path.rstrip().endswith('/')\r
+ try:\r
+ path = urllib.parse.unquote(path, errors='surrogatepass')\r
+ except UnicodeDecodeError:\r
+ path = urllib.parse.unquote(path)\r
+ path = posixpath.normpath(path)\r
+ words = path.split('/')\r
+ words = filter(None, words)\r
+ path = self.directory\r
+ for word in words:\r
+ if os.path.dirname(word) or word in (os.curdir, os.pardir):\r
+ # Ignore components that are not a simple file/directory name\r
+ continue\r
+ path = os.path.join(path, word)\r
+ if trailing_slash:\r
+ path += '/'\r
+ return path\r
+\r
+ def copyfile(self, source, outputfile):\r
+ """Copy all data between two file objects.\r
+\r
+ The SOURCE argument is a file object open for reading\r
+ (or anything with a read() method) and the DESTINATION\r
+ argument is a file object open for writing (or\r
+ anything with a write() method).\r
+\r
+ The only reason for overriding this would be to change\r
+ the block size or perhaps to replace newlines by CRLF\r
+ -- note however that this the default server uses this\r
+ to copy binary data as well.\r
+\r
+ """\r
+ shutil.copyfileobj(source, outputfile)\r
+\r
+ def guess_type(self, path):\r
+ """Guess the type of a file.\r
+\r
+ Argument is a PATH (a filename).\r
+\r
+ Return value is a string of the form type/subtype,\r
+ usable for a MIME Content-type header.\r
+\r
+ The default implementation looks the file's extension\r
+ up in the table self.extensions_map, using application/octet-stream\r
+ as a default; however it would be permissible (if\r
+ slow) to look inside the data to make a better guess.\r
+\r
+ """\r
+ base, ext = posixpath.splitext(path)\r
+ if ext in self.extensions_map:\r
+ return self.extensions_map[ext]\r
+ ext = ext.lower()\r
+ if ext in self.extensions_map:\r
+ return self.extensions_map[ext]\r
+ guess, _ = mimetypes.guess_type(path)\r
+ if guess:\r
+ return guess\r
+ return 'application/octet-stream'\r
+\r
+\r
+# Utilities for CGIHTTPRequestHandler\r
+\r
+def _url_collapse_path(path):\r
+ """\r
+ Given a URL path, remove extra '/'s and '.' path elements and collapse\r
+ any '..' references and returns a collapsed path.\r
+\r
+ Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.\r
+ The utility of this function is limited to is_cgi method and helps\r
+ preventing some security attacks.\r
+\r
+ Returns: The reconstituted URL, which will always start with a '/'.\r
+\r
+ Raises: IndexError if too many '..' occur within the path.\r
+\r
+ """\r
+ # Query component should not be involved.\r
+ path, _, query = path.partition('?')\r
+ path = urllib.parse.unquote(path)\r
+\r
+ # Similar to os.path.split(os.path.normpath(path)) but specific to URL\r
+ # path semantics rather than local operating system semantics.\r
+ path_parts = path.split('/')\r
+ head_parts = []\r
+ for part in path_parts[:-1]:\r
+ if part == '..':\r
+ head_parts.pop() # IndexError if more '..' than prior parts\r
+ elif part and part != '.':\r
+ head_parts.append( part )\r
+ if path_parts:\r
+ tail_part = path_parts.pop()\r
+ if tail_part:\r
+ if tail_part == '..':\r
+ head_parts.pop()\r
+ tail_part = ''\r
+ elif tail_part == '.':\r
+ tail_part = ''\r
+ else:\r
+ tail_part = ''\r
+\r
+ if query:\r
+ tail_part = '?'.join((tail_part, query))\r
+\r
+ splitpath = ('/' + '/'.join(head_parts), tail_part)\r
+ collapsed_path = "/".join(splitpath)\r
+\r
+ return collapsed_path\r
+\r
+\r
+\r
+nobody = None\r
+\r
+def nobody_uid():\r
+ """Internal routine to get nobody's uid"""\r
+ global nobody\r
+ if nobody:\r
+ return nobody\r
+ try:\r
+ import pwd\r
+ except ImportError:\r
+ return -1\r
+ try:\r
+ nobody = pwd.getpwnam('nobody')[2]\r
+ except KeyError:\r
+ nobody = 1 + max(x[2] for x in pwd.getpwall())\r
+ return nobody\r
+\r
+\r
+def executable(path):\r
+ """Test for executable file."""\r
+ return os.access(path, os.X_OK)\r
+\r
+\r
+class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):\r
+\r
+ """Complete HTTP server with GET, HEAD and POST commands.\r
+\r
+ GET and HEAD also support running CGI scripts.\r
+\r
+ The POST command is *only* implemented for CGI scripts.\r
+\r
+ """\r
+\r
+ # Determine platform specifics\r
+ have_fork = hasattr(os, 'fork')\r
+\r
+ # Make rfile unbuffered -- we need to read one line and then pass\r
+ # the rest to a subprocess, so we can't use buffered input.\r
+ rbufsize = 0\r
+\r
+ def do_POST(self):\r
+ """Serve a POST request.\r
+\r
+ This is only implemented for CGI scripts.\r
+\r
+ """\r
+\r
+ if self.is_cgi():\r
+ self.run_cgi()\r
+ else:\r
+ self.send_error(\r
+ HTTPStatus.NOT_IMPLEMENTED,\r
+ "Can only POST to CGI scripts")\r
+\r
+ def send_head(self):\r
+ """Version of send_head that support CGI scripts"""\r
+ if self.is_cgi():\r
+ return self.run_cgi()\r
+ else:\r
+ return SimpleHTTPRequestHandler.send_head(self)\r
+\r
+ def is_cgi(self):\r
+ """Test whether self.path corresponds to a CGI script.\r
+\r
+ Returns True and updates the cgi_info attribute to the tuple\r
+ (dir, rest) if self.path requires running a CGI script.\r
+ Returns False otherwise.\r
+\r
+ If any exception is raised, the caller should assume that\r
+ self.path was rejected as invalid and act accordingly.\r
+\r
+ The default implementation tests whether the normalized url\r
+ path begins with one of the strings in self.cgi_directories\r
+ (and the next character is a '/' or the end of the string).\r
+\r
+ """\r
+ collapsed_path = _url_collapse_path(self.path)\r
+ dir_sep = collapsed_path.find('/', 1)\r
+ while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:\r
+ dir_sep = collapsed_path.find('/', dir_sep+1)\r
+ if dir_sep > 0:\r
+ head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]\r
+ self.cgi_info = head, tail\r
+ return True\r
+ return False\r
+\r
+\r
+ cgi_directories = ['/cgi-bin', '/htbin']\r
+\r
+ def is_executable(self, path):\r
+ """Test whether argument path is an executable file."""\r
+ return executable(path)\r
+\r
+ def is_python(self, path):\r
+ """Test whether argument path is a Python script."""\r
+ head, tail = os.path.splitext(path)\r
+ return tail.lower() in (".py", ".pyw")\r
+\r
+ def run_cgi(self):\r
+ """Execute a CGI script."""\r
+ dir, rest = self.cgi_info\r
+ path = dir + '/' + rest\r
+ i = path.find('/', len(dir)+1)\r
+ while i >= 0:\r
+ nextdir = path[:i]\r
+ nextrest = path[i+1:]\r
+\r
+ scriptdir = self.translate_path(nextdir)\r
+ if os.path.isdir(scriptdir):\r
+ dir, rest = nextdir, nextrest\r
+ i = path.find('/', len(dir)+1)\r
+ else:\r
+ break\r
+\r
+ # find an explicit query string, if present.\r
+ rest, _, query = rest.partition('?')\r
+\r
+ # dissect the part after the directory name into a script name &\r
+ # a possible additional path, to be stored in PATH_INFO.\r
+ i = rest.find('/')\r
+ if i >= 0:\r
+ script, rest = rest[:i], rest[i:]\r
+ else:\r
+ script, rest = rest, ''\r
+\r
+ scriptname = dir + '/' + script\r
+ scriptfile = self.translate_path(scriptname)\r
+ if not os.path.exists(scriptfile):\r
+ self.send_error(\r
+ HTTPStatus.NOT_FOUND,\r
+ "No such CGI script (%r)" % scriptname)\r
+ return\r
+ if not os.path.isfile(scriptfile):\r
+ self.send_error(\r
+ HTTPStatus.FORBIDDEN,\r
+ "CGI script is not a plain file (%r)" % scriptname)\r
+ return\r
+ ispy = self.is_python(scriptname)\r
+ if self.have_fork or not ispy:\r
+ if not self.is_executable(scriptfile):\r
+ self.send_error(\r
+ HTTPStatus.FORBIDDEN,\r
+ "CGI script is not executable (%r)" % scriptname)\r
+ return\r
+\r
+ # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html\r
+ # XXX Much of the following could be prepared ahead of time!\r
+ env = copy.deepcopy(os.environ)\r
+ env['SERVER_SOFTWARE'] = self.version_string()\r
+ env['SERVER_NAME'] = self.server.server_name\r
+ env['GATEWAY_INTERFACE'] = 'CGI/1.1'\r
+ env['SERVER_PROTOCOL'] = self.protocol_version\r
+ env['SERVER_PORT'] = str(self.server.server_port)\r
+ env['REQUEST_METHOD'] = self.command\r
+ uqrest = urllib.parse.unquote(rest)\r
+ env['PATH_INFO'] = uqrest\r
+ env['PATH_TRANSLATED'] = self.translate_path(uqrest)\r
+ env['SCRIPT_NAME'] = scriptname\r
+ if query:\r
+ env['QUERY_STRING'] = query\r
+ env['REMOTE_ADDR'] = self.client_address[0]\r
+ authorization = self.headers.get("authorization")\r
+ if authorization:\r
+ authorization = authorization.split()\r
+ if len(authorization) == 2:\r
+ import base64, binascii\r
+ env['AUTH_TYPE'] = authorization[0]\r
+ if authorization[0].lower() == "basic":\r
+ try:\r
+ authorization = authorization[1].encode('ascii')\r
+ authorization = base64.decodebytes(authorization).\\r
+ decode('ascii')\r
+ except (binascii.Error, UnicodeError):\r
+ pass\r
+ else:\r
+ authorization = authorization.split(':')\r
+ if len(authorization) == 2:\r
+ env['REMOTE_USER'] = authorization[0]\r
+ # XXX REMOTE_IDENT\r
+ if self.headers.get('content-type') is None:\r
+ env['CONTENT_TYPE'] = self.headers.get_content_type()\r
+ else:\r
+ env['CONTENT_TYPE'] = self.headers['content-type']\r
+ length = self.headers.get('content-length')\r
+ if length:\r
+ env['CONTENT_LENGTH'] = length\r
+ referer = self.headers.get('referer')\r
+ if referer:\r
+ env['HTTP_REFERER'] = referer\r
+ accept = self.headers.get_all('accept', ())\r
+ env['HTTP_ACCEPT'] = ','.join(accept)\r
+ ua = self.headers.get('user-agent')\r
+ if ua:\r
+ env['HTTP_USER_AGENT'] = ua\r
+ co = filter(None, self.headers.get_all('cookie', []))\r
+ cookie_str = ', '.join(co)\r
+ if cookie_str:\r
+ env['HTTP_COOKIE'] = cookie_str\r
+ # XXX Other HTTP_* headers\r
+ # Since we're setting the env in the parent, provide empty\r
+ # values to override previously set values\r
+ for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',\r
+ 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):\r
+ env.setdefault(k, "")\r
+\r
+ self.send_response(HTTPStatus.OK, "Script output follows")\r
+ self.flush_headers()\r
+\r
+ decoded_query = query.replace('+', ' ')\r
+\r
+ if self.have_fork:\r
+ # Unix -- fork as we should\r
+ args = [script]\r
+ if '=' not in decoded_query:\r
+ args.append(decoded_query)\r
+ nobody = nobody_uid()\r
+ self.wfile.flush() # Always flush before forking\r
+ pid = os.fork()\r
+ if pid != 0:\r
+ # Parent\r
+ pid, sts = os.waitpid(pid, 0)\r
+ # throw away additional data [see bug #427345]\r
+ while select.select([self.rfile], [], [], 0)[0]:\r
+ if not self.rfile.read(1):\r
+ break\r
+ exitcode = os.waitstatus_to_exitcode(sts)\r
+ if exitcode:\r
+ self.log_error(f"CGI script exit code {exitcode}")\r
+ return\r
+ # Child\r
+ try:\r
+ try:\r
+ os.setuid(nobody)\r
+ except OSError:\r
+ pass\r
+ os.dup2(self.rfile.fileno(), 0)\r
+ os.dup2(self.wfile.fileno(), 1)\r
+ os.execve(scriptfile, args, env)\r
+ except:\r
+ self.server.handle_error(self.request, self.client_address)\r
+ os._exit(127)\r
+\r
+ else:\r
+ # Non-Unix -- use subprocess\r
+ import subprocess\r
+ cmdline = [scriptfile]\r
+ if self.is_python(scriptfile):\r
+ interp = sys.executable\r
+ if interp.lower().endswith("w.exe"):\r
+ # On Windows, use python.exe, not pythonw.exe\r
+ interp = interp[:-5] + interp[-4:]\r
+ cmdline = [interp, '-u'] + cmdline\r
+ if '=' not in query:\r
+ cmdline.append(query)\r
+ self.log_message("command: %s", subprocess.list2cmdline(cmdline))\r
+ try:\r
+ nbytes = int(length)\r
+ except (TypeError, ValueError):\r
+ nbytes = 0\r
+ p = subprocess.Popen(cmdline,\r
+ stdin=subprocess.PIPE,\r
+ stdout=subprocess.PIPE,\r
+ stderr=subprocess.PIPE,\r
+ env = env\r
+ )\r
+ if self.command.lower() == "post" and nbytes > 0:\r
+ data = self.rfile.read(nbytes)\r
+ else:\r
+ data = None\r
+ # throw away additional data [see bug #427345]\r
+ while select.select([self.rfile._sock], [], [], 0)[0]:\r
+ if not self.rfile._sock.recv(1):\r
+ break\r
+ stdout, stderr = p.communicate(data)\r
+ self.wfile.write(stdout)\r
+ if stderr:\r
+ self.log_error('%s', stderr)\r
+ p.stderr.close()\r
+ p.stdout.close()\r
+ status = p.returncode\r
+ if status:\r
+ self.log_error("CGI script exit status %#x", status)\r
+ else:\r
+ self.log_message("CGI script exited OK")\r
+\r
+\r
+def _get_best_family(*address):\r
+ infos = socket.getaddrinfo(\r
+ *address,\r
+ type=socket.SOCK_STREAM,\r
+ flags=socket.AI_PASSIVE,\r
+ )\r
+ family, type, proto, canonname, sockaddr = next(iter(infos))\r
+ return family, sockaddr\r
+\r
+\r
+def test(HandlerClass=BaseHTTPRequestHandler,\r
+ ServerClass=ThreadingHTTPServer,\r
+ protocol="HTTP/1.0", port=8000, bind=None):\r
+ """Test the HTTP request handler class.\r
+\r
+ This runs an HTTP server on port 8000 (or the port argument).\r
+\r
+ """\r
+ ServerClass.address_family, addr = _get_best_family(bind, port)\r
+\r
+ HandlerClass.protocol_version = protocol\r
+ with ServerClass(addr, HandlerClass) as httpd:\r
+ host, port = httpd.socket.getsockname()[:2]\r
+ url_host = f'[{host}]' if ':' in host else host\r
+ print(\r
+ f"Serving HTTP on {host} port {port} "\r
+ f"(http://{url_host}:{port}/) ..."\r
+ )\r
+ try:\r
+ httpd.serve_forever()\r
+ except KeyboardInterrupt:\r
+ print("\nKeyboard interrupt received, exiting.")\r
+ sys.exit(0)\r
+\r
+if __name__ == '__main__':\r
+ import argparse\r
+\r
+ parser = argparse.ArgumentParser()\r
+ parser.add_argument('--cgi', action='store_true',\r
+ help='Run as CGI Server')\r
+ parser.add_argument('--bind', '-b', metavar='ADDRESS',\r
+ help='Specify alternate bind address '\r
+ '[default: all interfaces]')\r
+ parser.add_argument('--directory', '-d', default=os.getcwd(),\r
+ help='Specify alternative directory '\r
+ '[default:current directory]')\r
+ parser.add_argument('port', action='store',\r
+ default=8000, type=int,\r
+ nargs='?',\r
+ help='Specify alternate port [default: 8000]')\r
+ args = parser.parse_args()\r
+ if args.cgi:\r
+ handler_class = CGIHTTPRequestHandler\r
+ else:\r
+ handler_class = partial(SimpleHTTPRequestHandler,\r
+ directory=args.directory)\r
+\r
+ # ensure dual-stack is not disabled; ref #38907\r
+ class DualStackServer(ThreadingHTTPServer):\r
+ def server_bind(self):\r
+ # suppress exception when protocol is IPv4\r
+ with contextlib.suppress(Exception):\r
+ self.socket.setsockopt(\r
+ socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)\r
+ return super().server_bind()\r
+\r
+ test(\r
+ HandlerClass=handler_class,\r
+ ServerClass=DualStackServer,\r
+ port=args.port,\r
+ bind=args.bind,\r
+ )\r