httplib.py 48 KB
Newer Older
1
r"""HTTP/1.1 client library
2 3 4 5

<intro stuff goes here>
<other stuff, too>

Andrew M. Kuchling's avatar
Andrew M. Kuchling committed
6
HTTPConnection goes through a number of "states", which define when a client
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
may legally make another request or fetch the response for a particular
request. This diagram details these state transitions:

    (null)
      |
      | HTTPConnection()
      v
    Idle
      |
      | putrequest()
      v
    Request-started
      |
      | ( putheader() )*  endheaders()
      v
    Request-sent
      |
      | response = getresponse()
      v
    Unread-response   [Response-headers-read]
      |\____________________
28 29 30 31 32 33 34 35 36 37 38 39 40
      |                     |
      | response.read()     | putrequest()
      v                     v
    Idle                  Req-started-unread-response
                     ______/|
                   /        |
   response.read() |        | ( putheader() )*  endheaders()
                   v        v
       Request-started    Req-sent-unread-response
                            |
                            | response.read()
                            v
                          Request-sent
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66

This diagram presents the following rules:
  -- a second request may not be started until {response-headers-read}
  -- a response [object] cannot be retrieved until {request-sent}
  -- there is no differentiation between an unread response body and a
     partially read response body

Note: this enforcement is applied by the HTTPConnection class. The
      HTTPResponse class does not enforce this state machine, which
      implies sophisticated clients may accelerate the request/response
      pipeline. Caution should be taken, though: accelerating the states
      beyond the above pattern may imply knowledge of the server's
      connection-close behavior for certain requests. For example, it
      is impossible to tell whether the server will close the connection
      UNTIL the response headers have been read; this means that further
      requests cannot be placed into the pipeline until it is known that
      the server will NOT be closing the connection.

Logical State                  __state            __response
-------------                  -------            ----------
Idle                           _CS_IDLE           None
Request-started                _CS_REQ_STARTED    None
Request-sent                   _CS_REQ_SENT       None
Unread-response                _CS_IDLE           <response_class>
Req-started-unread-response    _CS_REQ_STARTED    <response_class>
Req-sent-unread-response       _CS_REQ_SENT       <response_class>
67
"""
68

69
from array import array
70
import os
Jeremy Hylton's avatar
Jeremy Hylton committed
71
import socket
72
from sys import py3kwarning
73
from urlparse import urlsplit
Bill Janssen's avatar
Bill Janssen committed
74
import warnings
75 76 77 78
with warnings.catch_warnings():
    if py3kwarning:
        warnings.filterwarnings("ignore", ".*mimetools has been removed",
                                DeprecationWarning)
79
    import mimetools
80

81
try:
82
    from cStringIO import StringIO
83
except ImportError:
84
    from StringIO import StringIO
85

86
__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
87
           "HTTPException", "NotConnected", "UnknownProtocol",
88 89 90
           "UnknownTransferEncoding", "UnimplementedFileMode",
           "IncompleteRead", "InvalidURL", "ImproperConnectionState",
           "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
91
           "BadStatusLine", "error", "responses"]
92

93
HTTP_PORT = 80
94 95
HTTPS_PORT = 443

96 97 98 99 100 101 102
_UNKNOWN = 'UNKNOWN'

# connection states
_CS_IDLE = 'Idle'
_CS_REQ_STARTED = 'Request-started'
_CS_REQ_SENT = 'Request-sent'

103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
# status codes
# informational
CONTINUE = 100
SWITCHING_PROTOCOLS = 101
PROCESSING = 102

# successful
OK = 200
CREATED = 201
ACCEPTED = 202
NON_AUTHORITATIVE_INFORMATION = 203
NO_CONTENT = 204
RESET_CONTENT = 205
PARTIAL_CONTENT = 206
MULTI_STATUS = 207
IM_USED = 226

# redirection
MULTIPLE_CHOICES = 300
MOVED_PERMANENTLY = 301
FOUND = 302
SEE_OTHER = 303
NOT_MODIFIED = 304
USE_PROXY = 305
TEMPORARY_REDIRECT = 307

# client error
BAD_REQUEST = 400
UNAUTHORIZED = 401
PAYMENT_REQUIRED = 402
FORBIDDEN = 403
NOT_FOUND = 404
METHOD_NOT_ALLOWED = 405
NOT_ACCEPTABLE = 406
PROXY_AUTHENTICATION_REQUIRED = 407
REQUEST_TIMEOUT = 408
CONFLICT = 409
GONE = 410
LENGTH_REQUIRED = 411
PRECONDITION_FAILED = 412
REQUEST_ENTITY_TOO_LARGE = 413
REQUEST_URI_TOO_LONG = 414
UNSUPPORTED_MEDIA_TYPE = 415
REQUESTED_RANGE_NOT_SATISFIABLE = 416
EXPECTATION_FAILED = 417
UNPROCESSABLE_ENTITY = 422
LOCKED = 423
FAILED_DEPENDENCY = 424
UPGRADE_REQUIRED = 426

# server error
INTERNAL_SERVER_ERROR = 500
NOT_IMPLEMENTED = 501
BAD_GATEWAY = 502
SERVICE_UNAVAILABLE = 503
GATEWAY_TIMEOUT = 504
HTTP_VERSION_NOT_SUPPORTED = 505
INSUFFICIENT_STORAGE = 507
NOT_EXTENDED = 510

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
# Mapping status codes to official W3C names
responses = {
    100: 'Continue',
    101: 'Switching Protocols',

    200: 'OK',
    201: 'Created',
    202: 'Accepted',
    203: 'Non-Authoritative Information',
    204: 'No Content',
    205: 'Reset Content',
    206: 'Partial Content',

    300: 'Multiple Choices',
    301: 'Moved Permanently',
    302: 'Found',
    303: 'See Other',
    304: 'Not Modified',
    305: 'Use Proxy',
    306: '(Unused)',
    307: 'Temporary Redirect',

    400: 'Bad Request',
    401: 'Unauthorized',
    402: 'Payment Required',
    403: 'Forbidden',
    404: 'Not Found',
    405: 'Method Not Allowed',
    406: 'Not Acceptable',
    407: 'Proxy Authentication Required',
    408: 'Request Timeout',
    409: 'Conflict',
    410: 'Gone',
    411: 'Length Required',
    412: 'Precondition Failed',
    413: 'Request Entity Too Large',
    414: 'Request-URI Too Long',
    415: 'Unsupported Media Type',
    416: 'Requested Range Not Satisfiable',
    417: 'Expectation Failed',

    500: 'Internal Server Error',
    501: 'Not Implemented',
    502: 'Bad Gateway',
    503: 'Service Unavailable',
    504: 'Gateway Timeout',
    505: 'HTTP Version Not Supported',
}

212 213 214
# maximal amount of data to read at one time in _safe_read
MAXAMOUNT = 1048576

215 216 217
# maximal line length when calling readline().
_MAXLINE = 65536

218 219 220 221
# maximum amount of headers accepted
_MAXHEADERS = 100


222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
class HTTPMessage(mimetools.Message):

    def addheader(self, key, value):
        """Add header for field key handling repeats."""
        prev = self.dict.get(key)
        if prev is None:
            self.dict[key] = value
        else:
            combined = ", ".join((prev, value))
            self.dict[key] = combined

    def addcontinue(self, key, more):
        """Add more field data from a continuation line."""
        prev = self.dict[key]
        self.dict[key] = prev + "\n " + more

    def readheaders(self):
        """Read header lines.

        Read header lines up to the entirely blank line that terminates them.
        The (normally blank) line that ends the headers is skipped, but not
        included in the returned list.  If a non-header line ends the headers,
        (which is an error), an attempt is made to backspace over it; it is
        never included in the returned list.

        The variable self.status is set to the empty string if all went well,
        otherwise it is an error message.  The variable self.headers is a
        completely uninterpreted list of lines contained in the header (so
        printing them will reproduce the header exactly as it appears in the
        file).

        If multiple header fields with the same name occur, they are combined
        according to the rules in RFC 2616 sec 4.2:

        Appending each subsequent field-value to the first, each separated
        by a comma. The order in which header fields with the same field-name
        are received is significant to the interpretation of the combined
        field value.
        """
        # XXX The implementation overrides the readheaders() method of
        # rfc822.Message.  The base class design isn't amenable to
        # customized behavior here so the method here is a copy of the
        # base class code with a few small changes.

        self.dict = {}
        self.unixfrom = ''
Raymond Hettinger's avatar
Raymond Hettinger committed
268
        self.headers = hlist = []
269 270 271 272 273 274 275 276
        self.status = ''
        headerseen = ""
        firstline = 1
        startofline = unread = tell = None
        if hasattr(self.fp, 'unread'):
            unread = self.fp.unread
        elif self.seekable:
            tell = self.fp.tell
Raymond Hettinger's avatar
Raymond Hettinger committed
277
        while True:
278 279
            if len(hlist) > _MAXHEADERS:
                raise HTTPException("got more than %d headers" % _MAXHEADERS)
280 281 282 283 284 285
            if tell:
                try:
                    startofline = tell()
                except IOError:
                    startofline = tell = None
                    self.seekable = 0
286 287 288
            line = self.fp.readline(_MAXLINE + 1)
            if len(line) > _MAXLINE:
                raise LineTooLong("header line")
289 290 291 292 293 294 295 296 297 298 299 300
            if not line:
                self.status = 'EOF in headers'
                break
            # Skip unix From name time lines
            if firstline and line.startswith('From '):
                self.unixfrom = self.unixfrom + line
                continue
            firstline = 0
            if headerseen and line[0] in ' \t':
                # XXX Not sure if continuation lines are handled properly
                # for http and/or for repeating headers
                # It's a continuation line.
Raymond Hettinger's avatar
Raymond Hettinger committed
301
                hlist.append(line)
302 303 304 305 306 307 308 309 310 311 312
                self.addcontinue(headerseen, line.strip())
                continue
            elif self.iscomment(line):
                # It's a comment.  Ignore it.
                continue
            elif self.islast(line):
                # Note! No pushback here!  The delimiter line gets eaten.
                break
            headerseen = self.isheader(line)
            if headerseen:
                # It's a legal header line, save it.
Raymond Hettinger's avatar
Raymond Hettinger committed
313
                hlist.append(line)
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
                self.addheader(headerseen, line[len(headerseen)+1:].strip())
                continue
            else:
                # It's not a header line; throw it back and stop here.
                if not self.dict:
                    self.status = 'No headers'
                else:
                    self.status = 'Non-header line where header expected'
                # Try to undo the read.
                if unread:
                    unread(line)
                elif tell:
                    self.fp.seek(startofline)
                else:
                    self.status = self.status + '; bad seek'
                break
330 331

class HTTPResponse:
Jeremy Hylton's avatar
Jeremy Hylton committed
332 333 334

    # strict: If true, raise BadStatusLine if the status line can't be
    # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
Skip Montanaro's avatar
typo  
Skip Montanaro committed
335
    # false because it prevents clients from talking to HTTP/0.9
Jeremy Hylton's avatar
Jeremy Hylton committed
336 337 338 339 340
    # servers.  Note that a response with a sufficiently corrupted
    # status line will look like an HTTP/0.9 response.

    # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.

341 342 343
    def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
        if buffering:
            # The caller won't be using any sock.recv() calls, so buffering
Gregory P. Smith's avatar
Gregory P. Smith committed
344
            # is fine and recommended for performance.
345 346 347 348 349 350 351 352
            self.fp = sock.makefile('rb')
        else:
            # The buffer size is specified as zero, because the headers of
            # the response are read with readline().  If the reads were
            # buffered the readline() calls could consume some of the
            # response, which make be read via a recv() on the underlying
            # socket.
            self.fp = sock.makefile('rb', 0)
353
        self.debuglevel = debuglevel
Jeremy Hylton's avatar
Jeremy Hylton committed
354
        self.strict = strict
355
        self._method = method
356 357 358 359

        self.msg = None

        # from the Status-Line of the response
Tim Peters's avatar
Tim Peters committed
360 361 362
        self.version = _UNKNOWN # HTTP-Version
        self.status = _UNKNOWN  # Status-Code
        self.reason = _UNKNOWN  # Reason-Phrase
363

Tim Peters's avatar
Tim Peters committed
364 365 366 367
        self.chunked = _UNKNOWN         # is "chunked" being used?
        self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
        self.length = _UNKNOWN          # number of bytes left in response
        self.will_close = _UNKNOWN      # conn will close at end of response
368

369
    def _read_status(self):
Jeremy Hylton's avatar
Jeremy Hylton committed
370
        # Initialize with Simple-Response defaults
371 372 373
        line = self.fp.readline(_MAXLINE + 1)
        if len(line) > _MAXLINE:
            raise LineTooLong("header line")
374 375
        if self.debuglevel > 0:
            print "reply:", repr(line)
376 377 378 379
        if not line:
            # Presumably, the server closed the connection before
            # sending a valid response.
            raise BadStatusLine(line)
380
        try:
381
            [version, status, reason] = line.split(None, 2)
382 383
        except ValueError:
            try:
384
                [version, status] = line.split(None, 1)
385 386
                reason = ""
            except ValueError:
Jeremy Hylton's avatar
Jeremy Hylton committed
387 388 389 390 391 392 393 394 395 396 397
                # empty version will cause next test to fail and status
                # will be treated as 0.9 response.
                version = ""
        if not version.startswith('HTTP/'):
            if self.strict:
                self.close()
                raise BadStatusLine(line)
            else:
                # assume it's a Simple-Response from an 0.9 server
                self.fp = LineAndFileWrapper(line, self.fp)
                return "HTTP/0.9", 200, ""
398

399 400
        # The status code is a three-digit number
        try:
401
            status = int(status)
402 403 404 405
            if status < 100 or status > 999:
                raise BadStatusLine(line)
        except ValueError:
            raise BadStatusLine(line)
406 407
        return version, status, reason

408
    def begin(self):
409 410 411
        if self.msg is not None:
            # we've already started reading the response
            return
412

413
        # read until we get a non-100 response
Raymond Hettinger's avatar
Raymond Hettinger committed
414
        while True:
415
            version, status, reason = self._read_status()
416
            if status != CONTINUE:
417 418
                break
            # skip the header from the 100 response
Raymond Hettinger's avatar
Raymond Hettinger committed
419
            while True:
420 421 422 423
                skip = self.fp.readline(_MAXLINE + 1)
                if len(skip) > _MAXLINE:
                    raise LineTooLong("header line")
                skip = skip.strip()
424 425 426 427
                if not skip:
                    break
                if self.debuglevel > 0:
                    print "header:", skip
Tim Peters's avatar
Tim Peters committed
428

429 430
        self.status = status
        self.reason = reason.strip()
431 432
        if version == 'HTTP/1.0':
            self.version = 10
433
        elif version.startswith('HTTP/1.'):
Tim Peters's avatar
Tim Peters committed
434
            self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
435 436
        elif version == 'HTTP/0.9':
            self.version = 9
437 438
        else:
            raise UnknownProtocol(version)
439

440
        if self.version == 9:
441
            self.length = None
442
            self.chunked = 0
Jeremy Hylton's avatar
Jeremy Hylton committed
443
            self.will_close = 1
444
            self.msg = HTTPMessage(StringIO())
445 446
            return

447
        self.msg = HTTPMessage(self.fp, 0)
448 449 450
        if self.debuglevel > 0:
            for hdr in self.msg.headers:
                print "header:", hdr,
451 452 453 454 455 456

        # don't let the msg keep an fp
        self.msg.fp = None

        # are we using the chunked-style of transfer encoding?
        tr_enc = self.msg.getheader('transfer-encoding')
457
        if tr_enc and tr_enc.lower() == "chunked":
458 459 460 461 462 463
            self.chunked = 1
            self.chunk_left = None
        else:
            self.chunked = 0

        # will the connection close at the end of the response?
464
        self.will_close = self._check_close()
465 466 467 468 469

        # do we have a Content-Length?
        # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
        length = self.msg.getheader('content-length')
        if length and not self.chunked:
470 471 472 473
            try:
                self.length = int(length)
            except ValueError:
                self.length = None
474 475 476
            else:
                if self.length < 0:  # ignore nonsensical negative lengths
                    self.length = None
477 478 479 480
        else:
            self.length = None

        # does the body have a fixed length? (of zero)
481
        if (status == NO_CONTENT or status == NOT_MODIFIED or
482 483
            100 <= status < 200 or      # 1xx codes
            self._method == 'HEAD'):
484 485 486 487 488 489 490 491 492 493
            self.length = 0

        # if the connection remains open, and we aren't using chunked, and
        # a content-length was not provided, then assume that the connection
        # WILL close.
        if not self.will_close and \
           not self.chunked and \
           self.length is None:
            self.will_close = 1

494
    def _check_close(self):
495
        conn = self.msg.getheader('connection')
496 497 498 499
        if self.version == 11:
            # An HTTP/1.1 proxy is assumed to stay open unless
            # explicitly closed.
            conn = self.msg.getheader('connection')
500
            if conn and "close" in conn.lower():
501 502 503
                return True
            return False

504 505
        # Some HTTP/1.0 implementations have support for persistent
        # connections, using rules different than HTTP/1.1.
506

Georg Brandl's avatar
Georg Brandl committed
507
        # For older HTTP, Keep-Alive indicates persistent connection.
508 509
        if self.msg.getheader('keep-alive'):
            return False
Tim Peters's avatar
Tim Peters committed
510

511 512 513 514 515
        # At least Akamai returns a "Connection: Keep-Alive" header,
        # which was supposed to be sent by the client.
        if conn and "keep-alive" in conn.lower():
            return False

516 517
        # Proxy-Connection is a netscape hack.
        pconn = self.msg.getheader('proxy-connection')
518
        if pconn and "keep-alive" in pconn.lower():
519 520 521 522 523
            return False

        # otherwise, assume it will close
        return True

524 525 526 527 528 529 530 531 532 533 534 535 536 537
    def close(self):
        if self.fp:
            self.fp.close()
            self.fp = None

    def isclosed(self):
        # NOTE: it is possible that we will not ever call self.close(). This
        #       case occurs when will_close is TRUE, length is None, and we
        #       read up to the last byte, but NOT past it.
        #
        # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
        #          called, meaning self.isclosed() is meaningful.
        return self.fp is None

538 539
    # XXX It would be nice to have readline and __iter__ for this, too.

540 541 542 543
    def read(self, amt=None):
        if self.fp is None:
            return ''

544
        if self._method == 'HEAD':
545
            self.close()
546 547
            return ''

548
        if self.chunked:
549
            return self._read_chunked(amt)
Tim Peters's avatar
Tim Peters committed
550

551
        if amt is None:
552
            # unbounded read
553
            if self.length is None:
554 555
                s = self.fp.read()
            else:
556 557 558 559 560
                try:
                    s = self._safe_read(self.length)
                except IncompleteRead:
                    self.close()
                    raise
561
                self.length = 0
Tim Peters's avatar
Tim Peters committed
562
            self.close()        # we read everything
563 564 565 566 567 568 569 570 571 572 573
            return s

        if self.length is not None:
            if amt > self.length:
                # clip the read to the "end of response"
                amt = self.length

        # we do not use _safe_read() here because this may be a .will_close
        # connection, and the user is reading more bytes than will be provided
        # (for example, reading in 1k chunks)
        s = self.fp.read(amt)
574
        if not s and amt:
575 576 577
            # Ideally, we would raise IncompleteRead if the content-length
            # wasn't satisfied, but it might break compatibility.
            self.close()
578 579
        if self.length is not None:
            self.length -= len(s)
580 581
            if not self.length:
                self.close()
582

583 584
        return s

585 586 587
    def _read_chunked(self, amt):
        assert self.chunked != _UNKNOWN
        chunk_left = self.chunk_left
588
        value = []
Raymond Hettinger's avatar
Raymond Hettinger committed
589
        while True:
590
            if chunk_left is None:
591 592 593
                line = self.fp.readline(_MAXLINE + 1)
                if len(line) > _MAXLINE:
                    raise LineTooLong("chunk size")
594 595 596
                i = line.find(';')
                if i >= 0:
                    line = line[:i] # strip chunk-extensions
597 598 599 600 601 602
                try:
                    chunk_left = int(line, 16)
                except ValueError:
                    # close the connection as protocol synchronisation is
                    # probably lost
                    self.close()
603
                    raise IncompleteRead(''.join(value))
604 605 606
                if chunk_left == 0:
                    break
            if amt is None:
607
                value.append(self._safe_read(chunk_left))
608
            elif amt < chunk_left:
609
                value.append(self._safe_read(amt))
610
                self.chunk_left = chunk_left - amt
611
                return ''.join(value)
612
            elif amt == chunk_left:
613
                value.append(self._safe_read(amt))
614 615
                self._safe_read(2)  # toss the CRLF at the end of the chunk
                self.chunk_left = None
616
                return ''.join(value)
617
            else:
618
                value.append(self._safe_read(chunk_left))
619 620 621 622 623 624 625 626
                amt -= chunk_left

            # we read the whole chunk, get another
            self._safe_read(2)      # toss the CRLF at the end of the chunk
            chunk_left = None

        # read and discard trailer up to the CRLF terminator
        ### note: we shouldn't have any trailers!
Raymond Hettinger's avatar
Raymond Hettinger committed
627
        while True:
628 629 630
            line = self.fp.readline(_MAXLINE + 1)
            if len(line) > _MAXLINE:
                raise LineTooLong("trailer line")
631 632 633 634
            if not line:
                # a vanishingly small number of sites EOF without
                # sending the trailer
                break
635 636 637 638 639 640
            if line == '\r\n':
                break

        # we read everything; close the "file"
        self.close()

641
        return ''.join(value)
Tim Peters's avatar
Tim Peters committed
642

643 644 645 646 647 648 649 650 651 652 653 654 655 656
    def _safe_read(self, amt):
        """Read the number of bytes requested, compensating for partial reads.

        Normally, we have a blocking socket, but a read() can be interrupted
        by a signal (resulting in a partial read).

        Note that we cannot distinguish between EOF and an interrupt when zero
        bytes have been read. IncompleteRead() will be raised in this
        situation.

        This function should be used when <amt> bytes "should" be present for
        reading. If the bytes are truly not available (due to EOF), then the
        IncompleteRead exception can be used to detect the problem.
        """
657 658 659 660 661
        # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
        # return less than x bytes unless EOF is encountered.  It now handles
        # signal interruptions (socket.error EINTR) internally.  This code
        # never caught that exception anyways.  It seems largely pointless.
        # self.fp.read(amt) will work fine.
662
        s = []
663
        while amt > 0:
664
            chunk = self.fp.read(min(amt, MAXAMOUNT))
665
            if not chunk:
666
                raise IncompleteRead(''.join(s), amt)
667
            s.append(chunk)
Raymond Hettinger's avatar
Raymond Hettinger committed
668
            amt -= len(chunk)
669
        return ''.join(s)
670

671 672 673
    def fileno(self):
        return self.fp.fileno()

674 675 676 677
    def getheader(self, name, default=None):
        if self.msg is None:
            raise ResponseNotReady()
        return self.msg.getheader(name, default)
678

679 680 681 682 683 684
    def getheaders(self):
        """Return list of (header, value) tuples."""
        if self.msg is None:
            raise ResponseNotReady()
        return self.msg.items()

685 686 687

class HTTPConnection:

688 689 690 691 692 693
    _http_vsn = 11
    _http_vsn_str = 'HTTP/1.1'

    response_class = HTTPResponse
    default_port = HTTP_PORT
    auto_open = 1
694
    debuglevel = 0
Jeremy Hylton's avatar
Jeremy Hylton committed
695
    strict = 0
696

697
    def __init__(self, host, port=None, strict=None,
698
                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
699
        self.timeout = timeout
700
        self.source_address = source_address
701
        self.sock = None
702
        self._buffer = []
703 704
        self.__response = None
        self.__state = _CS_IDLE
705
        self._method = None
706 707
        self._tunnel_host = None
        self._tunnel_port = None
708
        self._tunnel_headers = {}
Jeremy Hylton's avatar
Jeremy Hylton committed
709 710
        if strict is not None:
            self.strict = strict
711

712 713 714 715 716 717
        (self.host, self.port) = self._get_hostport(host, port)

        # This is stored as an instance variable to allow unittests
        # to replace with a suitable mock
        self._create_connection = socket.create_connection

718
    def set_tunnel(self, host, port=None, headers=None):
719 720 721 722 723 724 725 726 727
        """ Set up host and port for HTTP CONNECT tunnelling.

        In a connection that uses HTTP Connect tunneling, the host passed to the
        constructor is used as proxy server that relays all communication to the
        endpoint passed to set_tunnel. This is done by sending a HTTP CONNECT
        request to the proxy server when the connection is established.

        This method must be called before the HTML connection has been
        established.
728 729 730 731

        The headers argument should be a mapping of extra HTTP headers
        to send with the CONNECT request.
        """
732 733 734 735
        # Verify if this is required.
        if self.sock:
            raise RuntimeError("Can't setup tunnel for established connection.")

736 737
        self._tunnel_host = host
        self._tunnel_port = port
738 739 740 741
        if headers:
            self._tunnel_headers = headers
        else:
            self._tunnel_headers.clear()
742

743
    def _get_hostport(self, host, port):
744
        if port is None:
745
            i = host.rfind(':')
746 747
            j = host.rfind(']')         # ipv6 addresses have [...]
            if i > j:
748 749 750
                try:
                    port = int(host[i+1:])
                except ValueError:
751 752 753 754
                    if host[i+1:] == "":  # http://foo.com:/ == http://foo.com/
                        port = self.default_port
                    else:
                        raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
755 756 757
                host = host[:i]
            else:
                port = self.default_port
758
            if host and host[0] == '[' and host[-1] == ']':
Brett Cannon's avatar
Brett Cannon committed
759
                host = host[1:-1]
760
        return (host, port)
761

762 763 764
    def set_debuglevel(self, level):
        self.debuglevel = level

765
    def _tunnel(self):
766 767
        (host, port) = self._get_hostport(self._tunnel_host, self._tunnel_port)
        self.send("CONNECT %s:%d HTTP/1.0\r\n" % (host, port))
768 769 770
        for header, value in self._tunnel_headers.iteritems():
            self.send("%s: %s\r\n" % (header, value))
        self.send("\r\n")
771 772 773 774 775 776
        response = self.response_class(self.sock, strict = self.strict,
                                       method = self._method)
        (version, code, message) = response._read_status()

        if code != 200:
            self.close()
777 778
            raise socket.error("Tunnel connection failed: %d %s" % (code,
                                                                    message.strip()))
779
        while True:
780 781 782
            line = response.fp.readline(_MAXLINE + 1)
            if len(line) > _MAXLINE:
                raise LineTooLong("header line")
783 784 785 786 787
            if not line:
                # for sites which EOF without sending trailer
                break
            if line == '\r\n':
                break
788 789


790 791
    def connect(self):
        """Connect to the host and port specified in __init__."""
792 793
        self.sock = self._create_connection((self.host,self.port),
                                           self.timeout, self.source_address)
794

795 796 797
        if self._tunnel_host:
            self._tunnel()

798 799 800
    def close(self):
        """Close the connection to the HTTP server."""
        if self.sock:
Tim Peters's avatar
Tim Peters committed
801
            self.sock.close()   # close it manually... there may be other refs
802 803 804 805 806 807
            self.sock = None
        if self.__response:
            self.__response.close()
            self.__response = None
        self.__state = _CS_IDLE

808 809
    def send(self, data):
        """Send `data' to the server."""
810 811 812 813 814 815
        if self.sock is None:
            if self.auto_open:
                self.connect()
            else:
                raise NotConnected()

816
        if self.debuglevel > 0:
817
            print "send:", repr(data)
818
        blocksize = 8192
819
        if hasattr(data,'read') and not isinstance(data, array):
820
            if self.debuglevel > 0: print "sendIng a read()able"
821 822 823 824
            datablock = data.read(blocksize)
            while datablock:
                self.sock.sendall(datablock)
                datablock = data.read(blocksize)
825
        else:
826
            self.sock.sendall(data)
827

828 829
    def _output(self, s):
        """Add a line of output to the current request buffer.
Tim Peters's avatar
Tim Peters committed
830

831
        Assumes that the line does *not* end with \\r\\n.
832 833 834
        """
        self._buffer.append(s)

835
    def _send_output(self, message_body=None):
836 837
        """Send the currently buffered request and clear the buffer.

838
        Appends an extra \\r\\n to the buffer.
839
        A message_body may be specified, to be appended to the request.
840 841 842 843
        """
        self._buffer.extend(("", ""))
        msg = "\r\n".join(self._buffer)
        del self._buffer[:]
844 845
        # If msg and message_body are sent in a single send() call,
        # it will avoid performance problems caused by the interaction
846
        # between delayed ack and the Nagle algorithm.
847
        if isinstance(message_body, str):
848
            msg += message_body
849
            message_body = None
850
        self.send(msg)
851 852 853 854
        if message_body is not None:
            #message_body was not a string (i.e. it is a file) and
            #we must run the risk of Nagle
            self.send(message_body)
855

856
    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
857 858 859 860
        """Send a request to the server.

        `method' specifies an HTTP request method, e.g. 'GET'.
        `url' specifies the object being requested, e.g. '/index.html'.
861 862 863
        `skip_host' if True does not add automatically a 'Host:' header
        `skip_accept_encoding' if True does not add automatically an
           'Accept-Encoding:' header
864 865
        """

866
        # if a prior response has been completed, then forget about it.
867 868 869
        if self.__response and self.__response.isclosed():
            self.__response = None

Tim Peters's avatar
Tim Peters committed
870

871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
        # in certain cases, we cannot issue another request on this connection.
        # this occurs when:
        #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
        #   2) a response to a previous request has signalled that it is going
        #      to close the connection upon completion.
        #   3) the headers for the previous response have not been read, thus
        #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
        #
        # if there is no prior response, then we can request at will.
        #
        # if point (2) is true, then we will have passed the socket to the
        # response (effectively meaning, "there is no prior response"), and
        # will open a new one when a new request is made.
        #
        # Note: if a prior response exists, then we *can* start a new request.
        #       We are not allowed to begin fetching the response to this new
        #       request, however, until that prior response is complete.
        #
        if self.__state == _CS_IDLE:
            self.__state = _CS_REQ_STARTED
        else:
            raise CannotSendRequest()

894 895
        # Save the method we use, we need it later in the response phase
        self._method = method
896 897
        if not url:
            url = '/'
898
        hdr = '%s %s %s' % (method, url, self._http_vsn_str)
899

900
        self._output(hdr)
901 902 903 904

        if self._http_vsn == 11:
            # Issue some standard headers for better HTTP/1.1 compliance

Jeremy Hylton's avatar
Jeremy Hylton committed
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
            if not skip_host:
                # this header is issued *only* for HTTP/1.1
                # connections. more specifically, this means it is
                # only issued when the client uses the new
                # HTTPConnection() class. backwards-compat clients
                # will be using HTTP/1.0 and those clients may be
                # issuing this header themselves. we should NOT issue
                # it twice; some web servers (such as Apache) barf
                # when they see two Host: headers

                # If we need a non-standard port,include it in the
                # header.  If the request is going through a proxy,
                # but the host of the actual URL, not the host of the
                # proxy.

                netloc = ''
                if url.startswith('http'):
                    nil, netloc, nil, nil, nil = urlsplit(url)

                if netloc:
925 926 927 928 929
                    try:
                        netloc_enc = netloc.encode("ascii")
                    except UnicodeEncodeError:
                        netloc_enc = netloc.encode("idna")
                    self.putheader('Host', netloc_enc)
Jeremy Hylton's avatar
Jeremy Hylton committed
930
                else:
931 932 933 934 935 936 937
                    if self._tunnel_host:
                        host = self._tunnel_host
                        port = self._tunnel_port
                    else:
                        host = self.host
                        port = self.port

938
                    try:
939
                        host_enc = host.encode("ascii")
940
                    except UnicodeEncodeError:
941
                        host_enc = host.encode("idna")
942 943 944
                    # Wrap the IPv6 Host Header with [] (RFC 2732)
                    if host_enc.find(':') >= 0:
                        host_enc = "[" + host_enc + "]"
945
                    if port == self.default_port:
946 947
                        self.putheader('Host', host_enc)
                    else:
948
                        self.putheader('Host', "%s:%s" % (host_enc, port))
949 950 951 952 953 954 955 956 957

            # note: we are assuming that clients will not attempt to set these
            #       headers since *this* library must deal with the
            #       consequences. this also means that when the supporting
            #       libraries are updated to recognize other forms, then this
            #       code should be changed (removed or updated).

            # we only want a Content-Encoding of "identity" since we don't
            # support encodings such as x-gzip or x-deflate.
958 959
            if not skip_accept_encoding:
                self.putheader('Accept-Encoding', 'identity')
960 961 962 963 964 965 966 967

            # we can accept "chunked" Transfer-Encodings, but no others
            # NOTE: no TE header implies *only* "chunked"
            #self.putheader('TE', 'chunked')

            # if TE is supplied in the header, then it must appear in a
            # Connection header.
            #self.putheader('Connection', 'TE')
968

969 970 971 972
        else:
            # For HTTP/1.0, the server will assume "not chunked"
            pass

973
    def putheader(self, header, *values):
974 975 976 977 978 979 980
        """Send a request header line to the server.

        For example: h.putheader('Accept', 'text/html')
        """
        if self.__state != _CS_REQ_STARTED:
            raise CannotSendHeader()

981
        hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values]))
982
        self._output(hdr)
983

984 985
    def endheaders(self, message_body=None):
        """Indicate that the last header line has been sent to the server.
986

987
        This method sends the request to the server.  The optional
988
        message_body argument can be used to pass a message body
989
        associated with the request.  The message body will be sent in
990 991
        the same packet as the message headers if it is string, otherwise it is
        sent as a separate packet.
992
        """
993 994 995 996
        if self.__state == _CS_REQ_STARTED:
            self.__state = _CS_REQ_SENT
        else:
            raise CannotSendHeader()
997
        self._send_output(message_body)
998 999 1000

    def request(self, method, url, body=None, headers={}):
        """Send a complete request to the server."""
1001
        self._send_request(method, url, body, headers)
1002

1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
    def _set_content_length(self, body):
        # Set the content-length based on the body.
        thelen = None
        try:
            thelen = str(len(body))
        except TypeError, te:
            # If this is a file-like object, try to
            # fstat its file descriptor
            try:
                thelen = str(os.fstat(body.fileno()).st_size)
            except (AttributeError, OSError):
                # Don't send a length if this failed
                if self.debuglevel > 0: print "Cannot stat!!"

        if thelen is not None:
            self.putheader('Content-Length', thelen)

1020
    def _send_request(self, method, url, body, headers):
1021
        # Honor explicitly requested Host: and Accept-Encoding: headers.
1022 1023 1024 1025 1026 1027
        header_names = dict.fromkeys([k.lower() for k in headers])
        skips = {}
        if 'host' in header_names:
            skips['skip_host'] = 1
        if 'accept-encoding' in header_names:
            skips['skip_accept_encoding'] = 1
1028

1029 1030
        self.putrequest(method, url, **skips)

1031
        if body is not None and 'content-length' not in header_names:
1032
            self._set_content_length(body)
Raymond Hettinger's avatar
Raymond Hettinger committed
1033
        for hdr, value in headers.iteritems():
1034
            self.putheader(hdr, value)
1035
        self.endheaders(body)
1036

1037
    def getresponse(self, buffering=False):
1038 1039
        "Get the response from the server."

1040
        # if a prior response has been completed, then forget about it.
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
        if self.__response and self.__response.isclosed():
            self.__response = None

        #
        # if a prior response exists, then it must be completed (otherwise, we
        # cannot read this response's header to determine the connection-close
        # behavior)
        #
        # note: if a prior response existed, but was connection-close, then the
        # socket and response were made independent of this HTTPConnection
        # object since a new request requires that we open a whole new
        # connection
        #
        # this means the prior response had one of two states:
        #   1) will_close: this connection was reset and the prior socket and
        #                  response operate independently
        #   2) persistent: the response was retained and we await its
        #                  isclosed() status to become true.
        #
        if self.__state != _CS_REQ_SENT or self.__response:
            raise ResponseNotReady()

1063 1064
        args = (self.sock,)
        kwds = {"strict":self.strict, "method":self._method}
1065
        if self.debuglevel > 0:
1066 1067 1068 1069 1070 1071
            args += (self.debuglevel,)
        if buffering:
            #only add this keyword if non-default, for compatibility with
            #other response_classes.
            kwds["buffering"] = True;
        response = self.response_class(*args, **kwds)
1072

1073
        response.begin()
1074
        assert response.will_close != _UNKNOWN
1075 1076 1077
        self.__state = _CS_IDLE

        if response.will_close:
1078 1079
            # this effectively passes the connection to the response
            self.close()
1080 1081 1082 1083 1084
        else:
            # remember this, so we can tell when it is complete
            self.__response = response

        return response
1085 1086


Jeremy Hylton's avatar
Jeremy Hylton committed
1087
class HTTP:
1088
    "Compatibility class with httplib.py from 1.5."
1089

1090 1091
    _http_vsn = 10
    _http_vsn_str = 'HTTP/1.0'
1092

1093
    debuglevel = 0
1094

Jeremy Hylton's avatar
Jeremy Hylton committed
1095 1096
    _connection_class = HTTPConnection

Jeremy Hylton's avatar
Jeremy Hylton committed
1097
    def __init__(self, host='', port=None, strict=None):
1098
        "Provide a default host, since the superclass requires one."
1099

1100 1101 1102
        # some joker passed 0 explicitly, meaning default port
        if port == 0:
            port = None
1103

1104
        # Note that we may pass an empty string as the host; this will raise
1105 1106
        # an error when we attempt to connect. Presumably, the client code
        # will call connect before then, with a proper host.
Jeremy Hylton's avatar
Jeremy Hylton committed
1107
        self._setup(self._connection_class(host, port, strict))
Greg Stein's avatar
Greg Stein committed
1108 1109 1110 1111

    def _setup(self, conn):
        self._conn = conn

Jeremy Hylton's avatar
Jeremy Hylton committed
1112
        # set up delegation to flesh out interface
Greg Stein's avatar
Greg Stein committed
1113 1114
        self.send = conn.send
        self.putrequest = conn.putrequest
1115
        self.putheader = conn.putheader
Greg Stein's avatar
Greg Stein committed
1116 1117 1118 1119 1120
        self.endheaders = conn.endheaders
        self.set_debuglevel = conn.set_debuglevel

        conn._http_vsn = self._http_vsn
        conn._http_vsn_str = self._http_vsn_str
1121

1122
        self.file = None
1123

1124 1125
    def connect(self, host=None, port=None):
        "Accept arguments to set the host/port, since the superclass doesn't."
1126

1127
        if host is not None:
Jeremy Hylton's avatar
Jeremy Hylton committed
1128 1129
            self._conn._set_hostport(host, port)
        self._conn.connect()
1130

1131 1132 1133
    def getfile(self):
        "Provide a getfile, since the superclass' does not use this concept."
        return self.file
1134

1135
    def getreply(self, buffering=False):
1136
        """Compat definition since superclass does not define it.
1137

1138 1139 1140 1141 1142 1143
        Returns a tuple consisting of:
        - server status code (e.g. '200' if all goes well)
        - server "reason" corresponding to status code
        - any RFC822 headers in the response from the server
        """
        try:
1144 1145 1146 1147 1148 1149
            if not buffering:
                response = self._conn.getresponse()
            else:
                #only add this keyword if non-default for compatibility
                #with other connection classes
                response = self._conn.getresponse(buffering)
1150 1151 1152
        except BadStatusLine, e:
            ### hmm. if getresponse() ever closes the socket on a bad request,
            ### then we are going to have problems with self.sock
1153

1154 1155
            ### should we keep this behavior? do people use it?
            # keep the socket open (as a file), and return it
Jeremy Hylton's avatar
Jeremy Hylton committed
1156
            self.file = self._conn.sock.makefile('rb', 0)
1157

1158 1159
            # close our socket -- we want to restart after any protocol error
            self.close()
1160

1161 1162
            self.headers = None
            return -1, e.line, None
1163

1164 1165 1166
        self.headers = response.msg
        self.file = response.fp
        return response.status, response.reason, response.msg
1167

1168
    def close(self):
Jeremy Hylton's avatar
Jeremy Hylton committed
1169
        self._conn.close()
1170

1171 1172 1173 1174 1175 1176
        # note that self.file == response.fp, which gets closed by the
        # superclass. just clear the object ref here.
        ### hmm. messy. if status==-1, then self.file is owned by us.
        ### well... we aren't explicitly closing, but losing this ref will
        ### do it
        self.file = None
1177

1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188
try:
    import ssl
except ImportError:
    pass
else:
    class HTTPSConnection(HTTPConnection):
        "This class allows communication via SSL."

        default_port = HTTPS_PORT

        def __init__(self, host, port=None, key_file=None, cert_file=None,
1189
                     strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1190
                     source_address=None, context=None, check_hostname=None):
1191 1192
            HTTPConnection.__init__(self, host, port, strict, timeout,
                                    source_address)
1193 1194
            self.key_file = key_file
            self.cert_file = cert_file
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
            if context is None:
                context = ssl.create_default_context()
            will_verify = context.verify_mode != ssl.CERT_NONE
            if check_hostname is None:
                check_hostname = will_verify
            elif check_hostname and not will_verify:
                raise ValueError("check_hostname needs a SSL context with "
                                 "either CERT_OPTIONAL or CERT_REQUIRED")
            if key_file or cert_file:
                context.load_cert_chain(cert_file, key_file)
            self._context = context
            self._check_hostname = check_hostname
1207 1208 1209 1210

        def connect(self):
            "Connect to a host on a given (SSL) port."

1211 1212
            HTTPConnection.connect(self)

1213
            if self._tunnel_host:
1214 1215 1216 1217 1218
                server_hostname = self._tunnel_host
            else:
                server_hostname = self.host

            self.sock = self._context.wrap_socket(self.sock,
1219
                                                  server_hostname=server_hostname)
1220 1221 1222 1223 1224 1225 1226
            if not self._context.check_hostname and self._check_hostname:
                try:
                    ssl.match_hostname(self.sock.getpeercert(), server_hostname)
                except Exception:
                    self.sock.shutdown(socket.SHUT_RDWR)
                    self.sock.close()
                    raise
1227

1228
    __all__.append("HTTPSConnection")
1229

Jeremy Hylton's avatar
Jeremy Hylton committed
1230 1231 1232 1233 1234
    class HTTPS(HTTP):
        """Compatibility with 1.5 httplib interface

        Python 1.5.2 did not have an HTTPS class, but it defined an
        interface for sending http requests that is also useful for
1235
        https.
Jeremy Hylton's avatar
Jeremy Hylton committed
1236 1237
        """

1238
        _connection_class = HTTPSConnection
1239

Jeremy Hylton's avatar
Jeremy Hylton committed
1240 1241
        def __init__(self, host='', port=None, key_file=None, cert_file=None,
                     strict=None):
Greg Stein's avatar
Greg Stein committed
1242 1243 1244 1245 1246
            # provide a default host, pass the X509 cert info

            # urf. compensate for bad input.
            if port == 0:
                port = None
Jeremy Hylton's avatar
Jeremy Hylton committed
1247 1248
            self._setup(self._connection_class(host, port, key_file,
                                               cert_file, strict))
Greg Stein's avatar
Greg Stein committed
1249 1250 1251

            # we never actually use these for anything, but we keep them
            # here for compatibility with post-1.5.2 CVS.
Jeremy Hylton's avatar
Jeremy Hylton committed
1252 1253
            self.key_file = key_file
            self.cert_file = cert_file
Greg Stein's avatar
Greg Stein committed
1254

1255

1256
    def FakeSocket (sock, sslobj):
Bill Janssen's avatar
Bill Janssen committed
1257
        warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
Bill Janssen's avatar
Bill Janssen committed
1258
                      "Use the result of ssl.wrap_socket() directly instead.",
Bill Janssen's avatar
Bill Janssen committed
1259
                      DeprecationWarning, stacklevel=2)
1260 1261 1262
        return sslobj


1263
class HTTPException(Exception):
Jeremy Hylton's avatar
Jeremy Hylton committed
1264 1265
    # Subclasses that define an __init__ must call Exception.__init__
    # or define self.args.  Otherwise, str() will fail.
1266
    pass
1267 1268

class NotConnected(HTTPException):
1269
    pass
1270

1271 1272 1273
class InvalidURL(HTTPException):
    pass

1274
class UnknownProtocol(HTTPException):
1275
    def __init__(self, version):
Jeremy Hylton's avatar
Jeremy Hylton committed
1276
        self.args = version,
1277
        self.version = version
1278 1279

class UnknownTransferEncoding(HTTPException):
1280
    pass
1281 1282

class UnimplementedFileMode(HTTPException):
1283
    pass
1284 1285

class IncompleteRead(HTTPException):
1286
    def __init__(self, partial, expected=None):
Jeremy Hylton's avatar
Jeremy Hylton committed
1287
        self.args = partial,
1288
        self.partial = partial
1289 1290 1291 1292 1293 1294 1295 1296 1297
        self.expected = expected
    def __repr__(self):
        if self.expected is not None:
            e = ', %i more expected' % self.expected
        else:
            e = ''
        return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
    def __str__(self):
        return repr(self)
1298 1299

class ImproperConnectionState(HTTPException):
1300
    pass
1301 1302

class CannotSendRequest(ImproperConnectionState):
1303
    pass
1304 1305

class CannotSendHeader(ImproperConnectionState):
1306
    pass
1307 1308

class ResponseNotReady(ImproperConnectionState):
1309
    pass
1310 1311

class BadStatusLine(HTTPException):
1312
    def __init__(self, line):
1313 1314
        if not line:
            line = repr(line)
Jeremy Hylton's avatar
Jeremy Hylton committed
1315
        self.args = line,
1316
        self.line = line
1317

1318 1319 1320 1321 1322
class LineTooLong(HTTPException):
    def __init__(self, line_type):
        HTTPException.__init__(self, "got more than %d bytes when reading %s"
                                     % (_MAXLINE, line_type))

1323 1324 1325
# for backwards compatibility
error = HTTPException

Jeremy Hylton's avatar
Jeremy Hylton committed
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346
class LineAndFileWrapper:
    """A limited file-like object for HTTP/0.9 responses."""

    # The status-line parsing code calls readline(), which normally
    # get the HTTP status line.  For a 0.9 response, however, this is
    # actually the first line of the body!  Clients need to get a
    # readable file object that contains that line.

    def __init__(self, line, file):
        self._line = line
        self._file = file
        self._line_consumed = 0
        self._line_offset = 0
        self._line_left = len(line)

    def __getattr__(self, attr):
        return getattr(self._file, attr)

    def _done(self):
        # called when the last byte is read from the line.  After the
        # call, all read methods are delegated to the underlying file
Skip Montanaro's avatar
typo  
Skip Montanaro committed
1347
        # object.
Jeremy Hylton's avatar
Jeremy Hylton committed
1348 1349 1350 1351 1352 1353
        self._line_consumed = 1
        self.read = self._file.read
        self.readline = self._file.readline
        self.readlines = self._file.readlines

    def read(self, amt=None):
1354 1355 1356
        if self._line_consumed:
            return self._file.read(amt)
        assert self._line_left
Jeremy Hylton's avatar
Jeremy Hylton committed
1357 1358 1359 1360 1361 1362
        if amt is None or amt > self._line_left:
            s = self._line[self._line_offset:]
            self._done()
            if amt is None:
                return s + self._file.read()
            else:
Tim Peters's avatar
Tim Peters committed
1363
                return s + self._file.read(amt - len(s))
Jeremy Hylton's avatar
Jeremy Hylton committed
1364 1365 1366 1367 1368 1369 1370 1371 1372 1373
        else:
            assert amt <= self._line_left
            i = self._line_offset
            j = i + amt
            s = self._line[i:j]
            self._line_offset = j
            self._line_left -= amt
            if self._line_left == 0:
                self._done()
            return s
Tim Peters's avatar
Tim Peters committed
1374

Jeremy Hylton's avatar
Jeremy Hylton committed
1375
    def readline(self):
1376 1377 1378
        if self._line_consumed:
            return self._file.readline()
        assert self._line_left
Jeremy Hylton's avatar
Jeremy Hylton committed
1379 1380 1381 1382 1383
        s = self._line[self._line_offset:]
        self._done()
        return s

    def readlines(self, size=None):
1384 1385 1386
        if self._line_consumed:
            return self._file.readlines(size)
        assert self._line_left
Jeremy Hylton's avatar
Jeremy Hylton committed
1387 1388 1389 1390 1391 1392
        L = [self._line[self._line_offset:]]
        self._done()
        if size is None:
            return L + self._file.readlines()
        else:
            return L + self._file.readlines(size)