1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ ="0.6" 84 85__all__ = [ 86"HTTPServer","ThreadingHTTPServer","BaseHTTPRequestHandler", 87"SimpleHTTPRequestHandler","CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import mimetypes 97import os 98import posixpath 99import select 100import shutil 101import socket # For gethostbyaddr() 102import socketserver 103import sys 104import time 105import urllib.parse 106import contextlib 107from functools import partial 108 109from http import HTTPStatus 110 111 112# Default error message template 113DEFAULT_ERROR_MESSAGE ="""\ 114<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 115 "http://www.w3.org/TR/html4/strict.dtd"> 116<html> 117 <head> 118 <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> 119 <title>Error response</title> 120 </head> 121 <body> 122 <h1>Error response</h1> 123 <p>Error code:%(code)d</p> 124 <p>Message:%(message)s.</p> 125 <p>Error code explanation:%(code)s-%(explain)s.</p> 126 </body> 127</html> 128""" 129 130DEFAULT_ERROR_CONTENT_TYPE ="text/html;charset=utf-8" 131 132classHTTPServer(socketserver.TCPServer): 133 134 allow_reuse_address =1# Seems to make sense in testing environment 135 136defserver_bind(self): 137"""Override server_bind to store the server name.""" 138 socketserver.TCPServer.server_bind(self) 139 host, port = self.server_address[:2] 140 self.server_name = socket.getfqdn(host) 141 self.server_port = port 142 143 144classThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 145 daemon_threads =True 146 147 148classBaseHTTPRequestHandler(socketserver.StreamRequestHandler): 149 150"""HTTP request handler base class. 151 152 The following explanation of HTTP serves to guide you through the 153 code as well as to expose any misunderstandings I may have about 154 HTTP (so you don't need to read the code to figure out I'm wrong 155 :-). 156 157 HTTP (HyperText Transfer Protocol) is an extensible protocol on 158 top of a reliable stream transport (e.g. TCP/IP). The protocol 159 recognizes three parts to a request: 160 161 1. One line identifying the request type and path 162 2. An optional set of RFC-822-style headers 163 3. An optional data part 164 165 The headers and data are separated by a blank line. 166 167 The first line of the request has the form 168 169 <command> <path> <version> 170 171 where <command> is a (case-sensitive) keyword such as GET or POST, 172 <path> is a string containing path information for the request, 173 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 174 <path> is encoded using the URL encoding scheme (using%xxto signify 175 the ASCII character with hex code xx). 176 177 The specification specifies that lines are separated by CRLF but 178 for compatibility with the widest range of clients recommends 179 servers also handle LF. Similarly, whitespace in the request line 180 is treated sensibly (allowing multiple spaces between components 181 and allowing trailing whitespace). 182 183 Similarly, for output, lines ought to be separated by CRLF pairs 184 but most clients grok LF characters just fine. 185 186 If the first line of the request has the form 187 188 <command> <path> 189 190 (i.e. <version> is left out) then this is assumed to be an HTTP 191 0.9 request; this form has no optional headers and data part and 192 the reply consists of just the data. 193 194 The reply form of the HTTP 1.x protocol again has three parts: 195 196 1. One line giving the response code 197 2. An optional set of RFC-822-style headers 198 3. The data 199 200 Again, the headers and data are separated by a blank line. 201 202 The response code line has the form 203 204 <version> <responsecode> <responsestring> 205 206 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 207 <responsecode> is a 3-digit response code indicating success or 208 failure of the request, and <responsestring> is an optional 209 human-readable string explaining what the response code means. 210 211 This server parses the request and the headers, and then calls a 212 function specific to the request type (<command>). Specifically, 213 a request SPAM will be handled by a method do_SPAM(). If no 214 such method exists the server sends an error response to the 215 client. If it exists, it is called with no arguments: 216 217 do_SPAM() 218 219 Note that the request name is case sensitive (i.e. SPAM and spam 220 are different requests). 221 222 The various request details are stored in instance variables: 223 224 - client_address is the client IP address in the form (host, 225 port); 226 227 - command, path and version are the broken-down request line; 228 229 - headers is an instance of email.message.Message (or a derived 230 class) containing the header information; 231 232 - rfile is a file object open for reading positioned at the 233 start of the optional input data part; 234 235 - wfile is a file object open for writing. 236 237 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 238 239 The first thing to be written must be the response line. Then 240 follow 0 or more header lines, then a blank line, and then the 241 actual data (if any). The meaning of the header lines depends on 242 the command executed by the server; in most cases, when data is 243 returned, there should be at least one header line of the form 244 245 Content-type: <type>/<subtype> 246 247 where <type> and <subtype> should be registered MIME types, 248 e.g. "text/html" or "text/plain". 249 250 """ 251 252# The Python system version, truncated to its first component. 253 sys_version ="Python/"+ sys.version.split()[0] 254 255# The server software version. You may want to override this. 256# The format is multiple whitespace-separated strings, 257# where each string is of the form name[/version]. 258 server_version ="BaseHTTP/"+ __version__ 259 260 error_message_format = DEFAULT_ERROR_MESSAGE 261 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 262 263# The default request version. This only affects responses up until 264# the point where the request line is parsed, so it mainly decides what 265# the client gets back when sending a malformed request line. 266# Most web servers default to HTTP 0.9, i.e. don't send a status line. 267 default_request_version ="HTTP/0.9" 268 269defparse_request(self): 270"""Parse a request (internal). 271 272 The request should be stored in self.raw_requestline; the results 273 are in self.command, self.path, self.request_version and 274 self.headers. 275 276 Return True for success, False for failure; on failure, any relevant 277 error response has already been sent back. 278 279 """ 280 self.command =None# set in case of error on the first line 281 self.request_version = version = self.default_request_version 282 self.close_connection =True 283 requestline =str(self.raw_requestline,'iso-8859-1') 284 requestline = requestline.rstrip('\r\n') 285 self.requestline = requestline 286 words = requestline.split() 287iflen(words) ==0: 288return False 289 290iflen(words) >=3:# Enough to determine protocol version 291 version = words[-1] 292try: 293if not version.startswith('HTTP/'): 294raiseValueError 295 base_version_number = version.split('/',1)[1] 296 version_number = base_version_number.split(".") 297# RFC 2145 section 3.1 says there can be only one "." and 298# - major and minor numbers MUST be treated as 299# separate integers; 300# - HTTP/2.4 is a lower version than HTTP/2.13, which in 301# turn is lower than HTTP/12.3; 302# - Leading zeros MUST be ignored by recipients. 303iflen(version_number) !=2: 304raiseValueError 305 version_number =int(version_number[0]),int(version_number[1]) 306except(ValueError,IndexError): 307 self.send_error( 308 HTTPStatus.BAD_REQUEST, 309"Bad request version (%r)"% version) 310return False 311if version_number >= (1,1)and self.protocol_version >="HTTP/1.1": 312 self.close_connection =False 313if version_number >= (2,0): 314 self.send_error( 315 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 316"Invalid HTTP version (%s)"% base_version_number) 317return False 318 self.request_version = version 319 320if not2<=len(words) <=3: 321 self.send_error( 322 HTTPStatus.BAD_REQUEST, 323"Bad request syntax (%r)"% requestline) 324return False 325 command, path = words[:2] 326iflen(words) ==2: 327 self.close_connection =True 328if command !='GET': 329 self.send_error( 330 HTTPStatus.BAD_REQUEST, 331"Bad HTTP/0.9 request type (%r)"% command) 332return False 333 self.command, self.path = command, path 334 335# Examine the headers and look for a Connection directive. 336try: 337 self.headers = http.client.parse_headers(self.rfile, 338 _class=self.MessageClass) 339except http.client.LineTooLong as err: 340 self.send_error( 341 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 342"Line too long", 343str(err)) 344return False 345except http.client.HTTPException as err: 346 self.send_error( 347 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 348"Too many headers", 349str(err) 350) 351return False 352 353 conntype = self.headers.get('Connection',"") 354if conntype.lower() =='close': 355 self.close_connection =True 356elif(conntype.lower() =='keep-alive'and 357 self.protocol_version >="HTTP/1.1"): 358 self.close_connection =False 359# Examine the headers and look for an Expect directive 360 expect = self.headers.get('Expect',"") 361if(expect.lower() =="100-continue"and 362 self.protocol_version >="HTTP/1.1"and 363 self.request_version >="HTTP/1.1"): 364if not self.handle_expect_100(): 365return False 366return True 367 368defhandle_expect_100(self): 369"""Decide what to do with an "Expect: 100-continue" header. 370 371 If the client is expecting a 100 Continue response, we must 372 respond with either a 100 Continue or a final response before 373 waiting for the request body. The default is to always respond 374 with a 100 Continue. You can behave differently (for example, 375 reject unauthorized requests) by overriding this method. 376 377 This method should either return True (possibly after sending 378 a 100 Continue response) or send an error response and return 379 False. 380 381 """ 382 self.send_response_only(HTTPStatus.CONTINUE) 383 self.end_headers() 384return True 385 386defhandle_one_request(self): 387"""Handle a single HTTP request. 388 389 You normally don't need to override this method; see the class 390 __doc__ string for information on how to handle specific HTTP 391 commands such as GET and POST. 392 393 """ 394try: 395 self.raw_requestline = self.rfile.readline(65537) 396iflen(self.raw_requestline) >65536: 397 self.requestline ='' 398 self.request_version ='' 399 self.command ='' 400 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 401return 402if not self.raw_requestline: 403 self.close_connection =True 404return 405if not self.parse_request(): 406# An error code has been sent, just exit 407return 408 mname ='do_'+ self.command 409if nothasattr(self, mname): 410 self.send_error( 411 HTTPStatus.NOT_IMPLEMENTED, 412"Unsupported method (%r)"% self.command) 413return 414 method =getattr(self, mname) 415method() 416 self.wfile.flush()#actually send the response if not already done. 417except socket.timeout as e: 418#a read or a write timed out. Discard this connection 419 self.log_error("Request timed out:%r", e) 420 self.close_connection =True 421return 422 423defhandle(self): 424"""Handle multiple requests if necessary.""" 425 self.close_connection =True 426 427 self.handle_one_request() 428while not self.close_connection: 429 self.handle_one_request() 430 431defsend_error(self, code, message=None, explain=None): 432"""Send and log an error reply. 433 434 Arguments are 435 * code: an HTTP error code 436 3 digits 437 * message: a simple optional 1 line reason phrase. 438 *( HTAB / SP / VCHAR /%x80-FF ) 439 defaults to short entry matching the response code 440 * explain: a detailed message defaults to the long entry 441 matching the response code. 442 443 This sends an error response (so it must be called before any 444 output has been generated), logs the error, and finally sends 445 a piece of HTML explaining the error to the user. 446 447 """ 448 449try: 450 shortmsg, longmsg = self.responses[code] 451exceptKeyError: 452 shortmsg, longmsg ='???','???' 453if message is None: 454 message = shortmsg 455if explain is None: 456 explain = longmsg 457 self.log_error("code%d, message%s", code, message) 458 self.send_response(code, message) 459 self.send_header('Connection','close') 460 461# Message body is omitted for cases described in: 462# - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 463# - RFC7231: 6.3.6. 205(Reset Content) 464 body =None 465if(code >=200and 466 code not in(HTTPStatus.NO_CONTENT, 467 HTTPStatus.RESET_CONTENT, 468 HTTPStatus.NOT_MODIFIED)): 469# HTML encode to prevent Cross Site Scripting attacks 470# (see bug #1100201) 471 content = (self.error_message_format % { 472'code': code, 473'message': html.escape(message, quote=False), 474'explain': html.escape(explain, quote=False) 475}) 476 body = content.encode('UTF-8','replace') 477 self.send_header("Content-Type", self.error_content_type) 478 self.send_header('Content-Length',str(len(body))) 479 self.end_headers() 480 481if self.command !='HEAD'and body: 482 self.wfile.write(body) 483 484defsend_response(self, code, message=None): 485"""Add the response header to the headers buffer and log the 486 response code. 487 488 Also send two standard headers with the server software 489 version and the current date. 490 491 """ 492 self.log_request(code) 493 self.send_response_only(code, message) 494 self.send_header('Server', self.version_string()) 495 self.send_header('Date', self.date_time_string()) 496 497defsend_response_only(self, code, message=None): 498"""Send the response header only.""" 499if self.request_version !='HTTP/0.9': 500if message is None: 501if code in self.responses: 502 message = self.responses[code][0] 503else: 504 message ='' 505if nothasattr(self,'_headers_buffer'): 506 self._headers_buffer = [] 507 self._headers_buffer.append(("%s %d %s\r\n"% 508(self.protocol_version, code, message)).encode( 509'latin-1','strict')) 510 511defsend_header(self, keyword, value): 512"""Send a MIME header to the headers buffer.""" 513if self.request_version !='HTTP/0.9': 514if nothasattr(self,'_headers_buffer'): 515 self._headers_buffer = [] 516 self._headers_buffer.append( 517("%s:%s\r\n"% (keyword, value)).encode('latin-1','strict')) 518 519if keyword.lower() =='connection': 520if value.lower() =='close': 521 self.close_connection =True 522elif value.lower() =='keep-alive': 523 self.close_connection =False 524 525defend_headers(self): 526"""Send the blank line ending the MIME headers.""" 527if self.request_version !='HTTP/0.9': 528 self._headers_buffer.append(b"\r\n") 529 self.flush_headers() 530 531defflush_headers(self): 532ifhasattr(self,'_headers_buffer'): 533 self.wfile.write(b"".join(self._headers_buffer)) 534 self._headers_buffer = [] 535 536deflog_request(self, code='-', size='-'): 537"""Log an accepted request. 538 539 This is called by send_response(). 540 541 """ 542ifisinstance(code, HTTPStatus): 543 code = code.value 544 self.log_message('"%s"%s %s', 545 self.requestline,str(code),str(size)) 546 547deflog_error(self, format, *args): 548"""Log an error. 549 550 This is called when a request cannot be fulfilled. By 551 default it passes the message on to log_message(). 552 553 Arguments are the same as for log_message(). 554 555 XXX This should go to the separate error log. 556 557 """ 558 559 self.log_message(format, *args) 560 561deflog_message(self, format, *args): 562"""Log an arbitrary message. 563 564 This is used by all other logging functions. Override 565 it if you have specific logging wishes. 566 567 The first argument, FORMAT, is a format string for the 568 message to be logged. If the format string contains 569 any % escapes requiring parameters, they should be 570 specified as subsequent arguments (it's just like 571 printf!). 572 573 The client ip and current date/time are prefixed to 574 every message. 575 576 """ 577 578 sys.stderr.write("%s- - [%s]%s\n"% 579(self.address_string(), 580 self.log_date_time_string(), 581 format%args)) 582 583defversion_string(self): 584"""Return the server software version string.""" 585return self.server_version +' '+ self.sys_version 586 587defdate_time_string(self, timestamp=None): 588"""Return the current date and time formatted for a message header.""" 589if timestamp is None: 590 timestamp = time.time() 591return email.utils.formatdate(timestamp, usegmt=True) 592 593deflog_date_time_string(self): 594"""Return the current time formatted for logging.""" 595 now = time.time() 596 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 597 s ="%02d/%3s/%04d%02d:%02d:%02d"% ( 598 day, self.monthname[month], year, hh, mm, ss) 599return s 600 601 weekdayname = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun'] 602 603 monthname = [None, 604'Jan','Feb','Mar','Apr','May','Jun', 605'Jul','Aug','Sep','Oct','Nov','Dec'] 606 607defaddress_string(self): 608"""Return the client address.""" 609 610return self.client_address[0] 611 612# Essentially static class variables 613 614# The version of the HTTP protocol we support. 615# Set this to HTTP/1.1 to enable automatic keepalive 616 protocol_version ="HTTP/1.0" 617 618# MessageClass used to parse headers 619 MessageClass = http.client.HTTPMessage 620 621# hack to maintain backwards compatibility 622 responses = { 623 v: (v.phrase, v.description) 624for v in HTTPStatus.__members__.values() 625} 626 627 628classSimpleHTTPRequestHandler(BaseHTTPRequestHandler): 629 630"""Simple HTTP request handler with GET and HEAD commands. 631 632 This serves files from the current directory and any of its 633 subdirectories. The MIME type for files is determined by 634 calling the .guess_type() method. 635 636 The GET and HEAD requests are identical except that the HEAD 637 request omits the actual contents of the file. 638 639 """ 640 641 server_version ="SimpleHTTP/"+ __version__ 642 extensions_map = _encodings_map_default = { 643'.gz':'application/gzip', 644'.Z':'application/octet-stream', 645'.bz2':'application/x-bzip2', 646'.xz':'application/x-xz', 647} 648 649def__init__(self, *args, directory=None, **kwargs): 650if directory is None: 651 directory = os.getcwd() 652 self.directory = os.fspath(directory) 653super().__init__(*args, **kwargs) 654 655defdo_GET(self): 656"""Serve a GET request.""" 657 f = self.send_head() 658if f: 659try: 660 self.copyfile(f, self.wfile) 661finally: 662 f.close() 663 664defdo_HEAD(self): 665"""Serve a HEAD request.""" 666 f = self.send_head() 667if f: 668 f.close() 669 670defsend_head(self): 671"""Common code for GET and HEAD commands. 672 673 This sends the response code and MIME headers. 674 675 Return value is either a file object (which has to be copied 676 to the outputfile by the caller unless the command was HEAD, 677 and must be closed by the caller under all circumstances), or 678 None, in which case the caller has nothing further to do. 679 680 """ 681 path = self.translate_path(self.path) 682 f =None 683if os.path.isdir(path): 684 parts = urllib.parse.urlsplit(self.path) 685if not parts.path.endswith('/'): 686# redirect browser - doing basically what apache does 687 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 688 new_parts = (parts[0], parts[1], parts[2] +'/', 689 parts[3], parts[4]) 690 new_url = urllib.parse.urlunsplit(new_parts) 691 self.send_header("Location", new_url) 692 self.end_headers() 693return None 694for index in"index.html","index.htm": 695 index = os.path.join(path, index) 696if os.path.exists(index): 697 path = index 698break 699else: 700return self.list_directory(path) 701 ctype = self.guess_type(path) 702# check for trailing "/" which should return 404. See Issue17324 703# The test for this was added in test_httpserver.py 704# However, some OS platforms accept a trailingSlash as a filename 705# See discussion on python-dev and Issue34711 regarding 706# parseing and rejection of filenames with a trailing slash 707if path.endswith("/"): 708 self.send_error(HTTPStatus.NOT_FOUND,"File not found") 709return None 710try: 711 f =open(path,'rb') 712exceptOSError: 713 self.send_error(HTTPStatus.NOT_FOUND,"File not found") 714return None 715 716try: 717 fs = os.fstat(f.fileno()) 718# Use browser cache if possible 719if("If-Modified-Since"in self.headers 720and"If-None-Match"not in self.headers): 721# compare If-Modified-Since and time of last file modification 722try: 723 ims = email.utils.parsedate_to_datetime( 724 self.headers["If-Modified-Since"]) 725except(TypeError,IndexError,OverflowError,ValueError): 726# ignore ill-formed values 727pass 728else: 729if ims.tzinfo is None: 730# obsolete format with no timezone, cf. 731# https://tools.ietf.org/html/rfc7231#section-7.1.1.1 732 ims = ims.replace(tzinfo=datetime.timezone.utc) 733if ims.tzinfo is datetime.timezone.utc: 734# compare to UTC datetime of last modification 735 last_modif = datetime.datetime.fromtimestamp( 736 fs.st_mtime, datetime.timezone.utc) 737# remove microseconds, like in If-Modified-Since 738 last_modif = last_modif.replace(microsecond=0) 739 740if last_modif <= ims: 741 self.send_response(HTTPStatus.NOT_MODIFIED) 742 self.end_headers() 743 f.close() 744return None 745 746 self.send_response(HTTPStatus.OK) 747 self.send_header("Content-type", ctype) 748 self.send_header("Content-Length",str(fs[6])) 749 self.send_header("Last-Modified", 750 self.date_time_string(fs.st_mtime)) 751 self.end_headers() 752return f 753except: 754 f.close() 755raise 756 757deflist_directory(self, path): 758"""Helper to produce a directory listing (absent index.html). 759 760 Return value is either a file object, or None (indicating an 761 error). In either case, the headers are sent, making the 762 interface the same as for send_head(). 763 764 """ 765try: 766list= os.listdir(path) 767exceptOSError: 768 self.send_error( 769 HTTPStatus.NOT_FOUND, 770"No permission to list directory") 771return None 772list.sort(key=lambda a: a.lower()) 773 r = [] 774try: 775 displaypath = urllib.parse.unquote(self.path, 776 errors='surrogatepass') 777exceptUnicodeDecodeError: 778 displaypath = urllib.parse.unquote(path) 779 displaypath = html.escape(displaypath, quote=False) 780 enc = sys.getfilesystemencoding() 781 title ='Directory listing for%s'% displaypath 782 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' 783'"http://www.w3.org/TR/html4/strict.dtd">') 784 r.append('<html>\n<head>') 785 r.append('<meta http-equiv="Content-Type" ' 786'content="text/html; charset=%s">'% enc) 787 r.append('<title>%s</title>\n</head>'% title) 788 r.append('<body>\n<h1>%s</h1>'% title) 789 r.append('<hr>\n<ul>') 790for name inlist: 791 fullname = os.path.join(path, name) 792 displayname = linkname = name 793# Append / for directories or @ for symbolic links 794if os.path.isdir(fullname): 795 displayname = name +"/" 796 linkname = name +"/" 797if os.path.islink(fullname): 798 displayname = name +"@" 799# Note: a link to a directory displays with @ and links with / 800 r.append('<li><a href="%s">%s</a></li>' 801% (urllib.parse.quote(linkname, 802 errors='surrogatepass'), 803 html.escape(displayname, quote=False))) 804 r.append('</ul>\n<hr>\n</body>\n</html>\n') 805 encoded ='\n'.join(r).encode(enc,'surrogateescape') 806 f = io.BytesIO() 807 f.write(encoded) 808 f.seek(0) 809 self.send_response(HTTPStatus.OK) 810 self.send_header("Content-type","text/html; charset=%s"% enc) 811 self.send_header("Content-Length",str(len(encoded))) 812 self.end_headers() 813return f 814 815deftranslate_path(self, path): 816"""Translate a /-separated PATH to the local filename syntax. 817 818 Components that mean special things to the local file system 819 (e.g. drive or directory names) are ignored. (XXX They should 820 probably be diagnosed.) 821 822 """ 823# abandon query parameters 824 path = path.split('?',1)[0] 825 path = path.split('#',1)[0] 826# Don't forget explicit trailing slash when normalizing. Issue17324 827 trailing_slash = path.rstrip().endswith('/') 828try: 829 path = urllib.parse.unquote(path, errors='surrogatepass') 830exceptUnicodeDecodeError: 831 path = urllib.parse.unquote(path) 832 path = posixpath.normpath(path) 833 words = path.split('/') 834 words =filter(None, words) 835 path = self.directory 836for word in words: 837if os.path.dirname(word)or word in(os.curdir, os.pardir): 838# Ignore components that are not a simple file/directory name 839continue 840 path = os.path.join(path, word) 841if trailing_slash: 842 path +='/' 843return path 844 845defcopyfile(self, source, outputfile): 846"""Copy all data between two file objects. 847 848 The SOURCE argument is a file object open for reading 849 (or anything with a read() method) and the DESTINATION 850 argument is a file object open for writing (or 851 anything with a write() method). 852 853 The only reason for overriding this would be to change 854 the block size or perhaps to replace newlines by CRLF 855 -- note however that this the default server uses this 856 to copy binary data as well. 857 858 """ 859 shutil.copyfileobj(source, outputfile) 860 861defguess_type(self, path): 862"""Guess the type of a file. 863 864 Argument is a PATH (a filename). 865 866 Return value is a string of the form type/subtype, 867 usable for a MIME Content-type header. 868 869 The default implementation looks the file's extension 870 up in the table self.extensions_map, using application/octet-stream 871 as a default; however it would be permissible (if 872 slow) to look inside the data to make a better guess. 873 874 """ 875 base, ext = posixpath.splitext(path) 876if ext in self.extensions_map: 877return self.extensions_map[ext] 878 ext = ext.lower() 879if ext in self.extensions_map: 880return self.extensions_map[ext] 881 guess, _ = mimetypes.guess_type(path) 882if guess: 883return guess 884return'application/octet-stream' 885 886 887# Utilities for CGIHTTPRequestHandler 888 889def_url_collapse_path(path): 890""" 891 Given a URL path, remove extra '/'s and '.' path elements and collapse 892 any '..' references and returns a collapsed path. 893 894 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 895 The utility of this function is limited to is_cgi method and helps 896 preventing some security attacks. 897 898 Returns: The reconstituted URL, which will always start with a '/'. 899 900 Raises: IndexError if too many '..' occur within the path. 901 902 """ 903# Query component should not be involved. 904 path, _, query = path.partition('?') 905 path = urllib.parse.unquote(path) 906 907# Similar to os.path.split(os.path.normpath(path)) but specific to URL 908# path semantics rather than local operating system semantics. 909 path_parts = path.split('/') 910 head_parts = [] 911for part in path_parts[:-1]: 912if part =='..': 913 head_parts.pop()# IndexError if more '..' than prior parts 914elif part and part !='.': 915 head_parts.append( part ) 916if path_parts: 917 tail_part = path_parts.pop() 918if tail_part: 919if tail_part =='..': 920 head_parts.pop() 921 tail_part ='' 922elif tail_part =='.': 923 tail_part ='' 924else: 925 tail_part ='' 926 927if query: 928 tail_part ='?'.join((tail_part, query)) 929 930 splitpath = ('/'+'/'.join(head_parts), tail_part) 931 collapsed_path ="/".join(splitpath) 932 933return collapsed_path 934 935 936 937nobody =None 938 939defnobody_uid(): 940"""Internal routine to get nobody's uid""" 941global nobody 942if nobody: 943return nobody 944try: 945import pwd 946exceptImportError: 947return-1 948try: 949 nobody = pwd.getpwnam('nobody')[2] 950exceptKeyError: 951 nobody =1+max(x[2]for x in pwd.getpwall()) 952return nobody 953 954 955defexecutable(path): 956"""Test for executable file.""" 957return os.access(path, os.X_OK) 958 959 960classCGIHTTPRequestHandler(SimpleHTTPRequestHandler): 961 962"""Complete HTTP server with GET, HEAD and POST commands. 963 964 GET and HEAD also support running CGI scripts. 965 966 The POST command is *only* implemented for CGI scripts. 967 968 """ 969 970# Determine platform specifics 971 have_fork =hasattr(os,'fork') 972 973# Make rfile unbuffered -- we need to read one line and then pass 974# the rest to a subprocess, so we can't use buffered input. 975 rbufsize =0 976 977defdo_POST(self): 978"""Serve a POST request. 979 980 This is only implemented for CGI scripts. 981 982 """ 983 984if self.is_cgi(): 985 self.run_cgi() 986else: 987 self.send_error( 988 HTTPStatus.NOT_IMPLEMENTED, 989"Can only POST to CGI scripts") 990 991defsend_head(self): 992"""Version of send_head that support CGI scripts""" 993if self.is_cgi(): 994return self.run_cgi() 995else: 996return SimpleHTTPRequestHandler.send_head(self) 997 998defis_cgi(self): 999"""Test whether self.path corresponds to a CGI script.10001001 Returns True and updates the cgi_info attribute to the tuple1002 (dir, rest) if self.path requires running a CGI script.1003 Returns False otherwise.10041005 If any exception is raised, the caller should assume that1006 self.path was rejected as invalid and act accordingly.10071008 The default implementation tests whether the normalized url1009 path begins with one of the strings in self.cgi_directories1010 (and the next character is a '/' or the end of the string).10111012 """1013 collapsed_path =_url_collapse_path(self.path)1014 dir_sep = collapsed_path.find('/',1)1015while dir_sep >0and not collapsed_path[:dir_sep]in self.cgi_directories:1016 dir_sep = collapsed_path.find('/', dir_sep+1)1017if dir_sep >0:1018 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]1019 self.cgi_info = head, tail1020return True1021return False102210231024 cgi_directories = ['/cgi-bin','/htbin']10251026defis_executable(self, path):1027"""Test whether argument path is an executable file."""1028returnexecutable(path)10291030defis_python(self, path):1031"""Test whether argument path is a Python script."""1032 head, tail = os.path.splitext(path)1033return tail.lower()in(".py",".pyw")10341035defrun_cgi(self):1036"""Execute a CGI script."""1037dir, rest = self.cgi_info1038 path =dir+'/'+ rest1039 i = path.find('/',len(dir)+1)1040while i >=0:1041 nextdir = path[:i]1042 nextrest = path[i+1:]10431044 scriptdir = self.translate_path(nextdir)1045if os.path.isdir(scriptdir):1046dir, rest = nextdir, nextrest1047 i = path.find('/',len(dir)+1)1048else:1049break10501051# find an explicit query string, if present.1052 rest, _, query = rest.partition('?')10531054# dissect the part after the directory name into a script name &1055# a possible additional path, to be stored in PATH_INFO.1056 i = rest.find('/')1057if i >=0:1058 script, rest = rest[:i], rest[i:]1059else:1060 script, rest = rest,''10611062 scriptname =dir+'/'+ script1063 scriptfile = self.translate_path(scriptname)1064if not os.path.exists(scriptfile):1065 self.send_error(1066 HTTPStatus.NOT_FOUND,1067"No such CGI script (%r)"% scriptname)1068return1069if not os.path.isfile(scriptfile):1070 self.send_error(1071 HTTPStatus.FORBIDDEN,1072"CGI script is not a plain file (%r)"% scriptname)1073return1074 ispy = self.is_python(scriptname)1075if self.have_fork or not ispy:1076if not self.is_executable(scriptfile):1077 self.send_error(1078 HTTPStatus.FORBIDDEN,1079"CGI script is not executable (%r)"% scriptname)1080return10811082# Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html1083# XXX Much of the following could be prepared ahead of time!1084 env = copy.deepcopy(os.environ)1085 env['SERVER_SOFTWARE'] = self.version_string()1086 env['SERVER_NAME'] = self.server.server_name1087 env['GATEWAY_INTERFACE'] ='CGI/1.1'1088 env['SERVER_PROTOCOL'] = self.protocol_version1089 env['SERVER_PORT'] =str(self.server.server_port)1090 env['REQUEST_METHOD'] = self.command1091 uqrest = urllib.parse.unquote(rest)1092 env['PATH_INFO'] = uqrest1093 env['PATH_TRANSLATED'] = self.translate_path(uqrest)1094 env['SCRIPT_NAME'] = scriptname1095if query:1096 env['QUERY_STRING'] = query1097 env['REMOTE_ADDR'] = self.client_address[0]1098 authorization = self.headers.get("authorization")1099if authorization:1100 authorization = authorization.split()1101iflen(authorization) ==2:1102import base64, binascii1103 env['AUTH_TYPE'] = authorization[0]1104if authorization[0].lower() =="basic":1105try:1106 authorization = authorization[1].encode('ascii')1107 authorization = base64.decodebytes(authorization).\1108decode('ascii')1109except(binascii.Error,UnicodeError):1110pass1111else:1112 authorization = authorization.split(':')1113iflen(authorization) ==2:1114 env['REMOTE_USER'] = authorization[0]1115# XXX REMOTE_IDENT1116if self.headers.get('content-type')is None:1117 env['CONTENT_TYPE'] = self.headers.get_content_type()1118else:1119 env['CONTENT_TYPE'] = self.headers['content-type']1120 length = self.headers.get('content-length')1121if length:1122 env['CONTENT_LENGTH'] = length1123 referer = self.headers.get('referer')1124if referer:1125 env['HTTP_REFERER'] = referer1126 accept = self.headers.get_all('accept', ())1127 env['HTTP_ACCEPT'] =','.join(accept)1128 ua = self.headers.get('user-agent')1129if ua:1130 env['HTTP_USER_AGENT'] = ua1131 co =filter(None, self.headers.get_all('cookie', []))1132 cookie_str =', '.join(co)1133if cookie_str:1134 env['HTTP_COOKIE'] = cookie_str1135# XXX Other HTTP_* headers1136# Since we're setting the env in the parent, provide empty1137# values to override previously set values1138for k in('QUERY_STRING','REMOTE_HOST','CONTENT_LENGTH',1139'HTTP_USER_AGENT','HTTP_COOKIE','HTTP_REFERER'):1140 env.setdefault(k,"")11411142 self.send_response(HTTPStatus.OK,"Script output follows")1143 self.flush_headers()11441145 decoded_query = query.replace('+',' ')11461147if self.have_fork:1148# Unix -- fork as we should1149 args = [script]1150if'='not in decoded_query:1151 args.append(decoded_query)1152 nobody =nobody_uid()1153 self.wfile.flush()# Always flush before forking1154 pid = os.fork()1155if pid !=0:1156# Parent1157 pid, sts = os.waitpid(pid,0)1158# throw away additional data [see bug #427345]1159while select.select([self.rfile], [], [],0)[0]:1160if not self.rfile.read(1):1161break1162 exitcode = os.waitstatus_to_exitcode(sts)1163if exitcode:1164 self.log_error(f"CGI script exit code{exitcode}")1165return1166# Child1167try:1168try:1169 os.setuid(nobody)1170exceptOSError:1171pass1172 os.dup2(self.rfile.fileno(),0)1173 os.dup2(self.wfile.fileno(),1)1174 os.execve(scriptfile, args, env)1175except:1176 self.server.handle_error(self.request, self.client_address)1177 os._exit(127)11781179else:1180# Non-Unix -- use subprocess1181import subprocess1182 cmdline = [scriptfile]1183if self.is_python(scriptfile):1184 interp = sys.executable1185if interp.lower().endswith("w.exe"):1186# On Windows, use python.exe, not pythonw.exe1187 interp = interp[:-5] + interp[-4:]1188 cmdline = [interp,'-u'] + cmdline1189if'='not in query:1190 cmdline.append(query)1191 self.log_message("command:%s", subprocess.list2cmdline(cmdline))1192try:1193 nbytes =int(length)1194except(TypeError,ValueError):1195 nbytes =01196 p = subprocess.Popen(cmdline,1197 stdin=subprocess.PIPE,1198 stdout=subprocess.PIPE,1199 stderr=subprocess.PIPE,1200 env = env1201)1202if self.command.lower() =="post"and nbytes >0:1203 data = self.rfile.read(nbytes)1204else:1205 data =None1206# throw away additional data [see bug #427345]1207while select.select([self.rfile._sock], [], [],0)[0]:1208if not self.rfile._sock.recv(1):1209break1210 stdout, stderr = p.communicate(data)1211 self.wfile.write(stdout)1212if stderr:1213 self.log_error('%s', stderr)1214 p.stderr.close()1215 p.stdout.close()1216 status = p.returncode1217if status:1218 self.log_error("CGI script exit status%#x", status)1219else:1220 self.log_message("CGI script exited OK")122112221223def_get_best_family(*address):1224 infos = socket.getaddrinfo(1225*address,1226type=socket.SOCK_STREAM,1227 flags=socket.AI_PASSIVE,1228)1229 family,type, proto, canonname, sockaddr =next(iter(infos))1230return family, sockaddr123112321233deftest(HandlerClass=BaseHTTPRequestHandler,1234 ServerClass=ThreadingHTTPServer,1235 protocol="HTTP/1.0", port=8000, bind=None):1236"""Test the HTTP request handler class.12371238 This runs an HTTP server on port 8000 (or the port argument).12391240 """1241 ServerClass.address_family, addr =_get_best_family(bind, port)12421243 HandlerClass.protocol_version = protocol1244withServerClass(addr, HandlerClass)as httpd:1245 host, port = httpd.socket.getsockname()[:2]1246 url_host = f'[{host}]'if':'in host else host1247print(1248 f"Serving HTTP on{host}port{port}"1249 f"(http://{url_host}:{port}/) ..."1250)1251try:1252 httpd.serve_forever()1253exceptKeyboardInterrupt:1254print("\nKeyboard interrupt received, exiting.")1255 sys.exit(0)12561257if __name__ =='__main__':1258import argparse12591260 parser = argparse.ArgumentParser()1261 parser.add_argument('--cgi', action='store_true',1262help='Run as CGI Server')1263 parser.add_argument('--bind','-b', metavar='ADDRESS',1264help='Specify alternate bind address '1265'[default: all interfaces]')1266 parser.add_argument('--directory','-d', default=os.getcwd(),1267help='Specify alternative directory '1268'[default:current directory]')1269 parser.add_argument('port', action='store',1270 default=8000,type=int,1271 nargs='?',1272help='Specify alternate port [default: 8000]')1273 args = parser.parse_args()1274if args.cgi:1275 handler_class = CGIHTTPRequestHandler1276else:1277 handler_class =partial(SimpleHTTPRequestHandler,1278 directory=args.directory)12791280# ensure dual-stack is not disabled; ref #389071281classDualStackServer(ThreadingHTTPServer):1282defserver_bind(self):1283# suppress exception when protocol is IPv41284with contextlib.suppress(Exception):1285 self.socket.setsockopt(1286 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY,0)1287returnsuper().server_bind()12881289test(1290 HandlerClass=handler_class,1291 ServerClass=DualStackServer,1292 port=args.port,1293 bind=args.bind,1294)