import asyncore # Lightweight library for asynchronous I/O. import urlparse # Library for parsing URLs. import string # Library for basic string operations. import socket # Low-level networking primitives. # Indicates the end of the header (amongst other things) in an HTTP message. HTTP_HEADER_DELIM = "\r\n\r\n" #--- # Our custom dispacther class. It handles all of the network I/O. class CustomDispatcher (asyncore.dispatcher): MAX_READ_LEN = 50000 # Maximum length of a web page (including the # HTTP header) that we are willing to read. # Class constructor. The arguments are the URL to read, and a pair of # functions (with arguments) to process any data (or errors) received. def __init__(self,url, process_func,process_args, error_func,error_args): asyncore.dispatcher.__init__(self) # Save the received callbacks for future use. self.__process_func = process_func self.__process_args = process_args self.__error_func = error_func self.__error_args = error_args # Indicates that we still have not read the HTTP header of the reply. self.__is_header = 1 # We are interested in both read and write events. self.__writeable = 1 self.__readable = 1 # Indicates how many characters can still be read. The # number will decrease as data is received. self.__chars_left = self.MAX_READ_LEN self.__buffer = '' # Our communication channel (socket) is not open yet. self.__is_open = 0 #Parse the URL. scheme, host, path, params, query, fragment = urlparse.urlparse(url) # Our dispatcher class can use the HTTP protocol only. if scheme != 'http': self.__call_error_func( 'You must specify the HTTP protocol in the URL.') return # Set the port that we will connect to on the server. If no port is # specified (as is usually the case) default to the standard port 80 try: host, port = string.split(host, ':', 1) port = int(port) except (TypeError, ValueError): port = 80 if not path: # Default to the root path. path = '/' if params: # Parameters, if any. path = path+';'+params if query: # Query, if any. path = path+'?'+query # Formulate a standard HTTP 'GET' request. self.__request = "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n"%(path, host) # Create the socket to carry out the communication. The socket is # of streaming (TCP), Internet type. self.create_socket(socket.AF_INET, socket.SOCK_STREAM) # The socket is now considered open. self.__is_open = 1 # Connect to the server; signal an error if the connection fails. try: self.connect((host, port)) except Exception, e: self.__call_error_func('Failed to connect: '+str(e)) return # Asyncore uses this method to determine if we are interested in # receiving read events (i.e. calls to our 'handle_read', below). def readable(self): return self.__readable # Asyncore uses this method to determine if we are interested in # receiving write events (i.e. calls to our 'handle_write', below). def writeable(self): return self.__writeable # Called by asyncore when a connection is established. In our case, # there is nothing to do for this event. def handle_connect(self): pass # Called by asyncore when some data can be read without blocking. def handle_read(self): # Do not allow reading beyond MAX_READ_LEN. if self.__chars_left < 1: self.__call_error_func( 'Maximum read length (%d) exceeded.'%self.MAX_READ_LEN) return # Read available data; adjust how much we are still allowed to read. self.__buffer = self.__buffer+self.recv(self.__chars_left) self.__chars_left = self.MAX_READ_LEN-len(self.__buffer) # If we are still reading the HTTP header ... if self.__is_header: # Search for end-of-header indicator. delim_pos = string.find(self.__buffer,HTTP_HEADER_DELIM) if (delim_pos != -1): # We have found the end of the header. Discard it, but keep # any part of the body that we may have read. self.__buffer = self.__buffer[delim_pos +len(HTTP_HEADER_DELIM):] self.__is_header = 0 # We are no longer reading the header. # Called by asyncore when some data can be written without blocking. def handle_write(self): # If we have sent the entire request (see below), # then there is nothing left to do for this event. if self.__request == None: return # Our request is not done yet -- send some data. sent = self.send(self.__request) if sent == len(self.__request): # If we have sent everything. self.__writeable = 0 # No longer interested in write events. self.__request = None # Nothing left to send. else: # We have sent only part of the request; prepare to # send the rest when 'handle_write' is called again. # This is done by deleting all data that has just been # written from our request buffer. self.__request = self.__request[sent:] # Called by asyncore when the underlying socket has closed. def handle_close(self): self.close() # Make sure we are closed. self.__readable = 0 # No longer interested in reading. self.__writeable = 0 # No longer interested in writing. # See the text of the article for a discussion of 'apply'. apply(self.__process_func, (self.__buffer,)+self.__process_args) # Helper method, which we call when an error occurrs. def __call_error_func(self,message): if self.__is_open: # Close the channel (if it was open). self.close() self.__readable = 0 # No longer interested in read events. self.__writeable = 0 # No longer interested in write events. # See the text of the article for a discussion of 'apply'. apply(self.__error_func, (message,)+self.__error_args) #--- End of CustomDispatcher Class ---