How would I implement “tail” over HTTP with Python Tornado?

I'm trying to implement something like "tail -f" over HTTP with Python. Currently, I'm trying to use Tornado, but it only is handling one connection at a time, even when I do asynchronous requests.

import socket
import subprocess

import tornado.gen as gen
import tornado.httpserver
import tornado.ioloop
import tornado.iostream
import tornado.options
import tornado.web

from tornado.options import define, options

define("port", default=8888, help="run on the given port", type=int)
define(
    "inputfile",
    default="test.txt",
    help="the path to the file which we will 'tail'",
    type=str)


class MainHandler(tornado.web.RequestHandler):
    @tornado.web.asynchronous
    @gen.engine
    def get(self):
        print "GOT REQUEST"
        inputfile = open(options.inputfile)
        p = subprocess.Popen(
            "./nettail.py",
            stdin=inputfile,
            stdout=subprocess.PIPE)
        port_number = int(p.stdout.readline().strip())

        self.write("<pre>")
        self.write("Hello, world\n")
        self.flush()

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
        stream = tornado.iostream.IOStream(s)
        yield gen.Task(stream.connect, ("127.0.0.1", port_number))
        while True:
            data = yield gen.Task(stream.read_until, "\n")
            self.write(data)
            self.flush()

def main():
    tornado.options.parse_command_line()
    application = tornado.web.Application([
        (r"/", MainHandler),
    ])
    http_server = tornado.httpserver.HTTPServer(application)
    http_server.listen(options.port)
    tornado.ioloop.IOLoop.instance().start()


if __name__ == "__main__":
    main()

The process I am starting is a simple "tail" which outputs to a socket.

import random
import socket
import sys
import time

#create an INET, STREAMing socket
s = socket.socket(
    socket.AF_INET, socket.SOCK_STREAM)

# Open the connection.
try:
    for attempt_number in xrange(5):
        port_number = random.randint(9000, 65000)
        try:
            s.bind(("localhost", port_number))
        except socket.error:
            continue
        # We successfully bound!
        sys.stdout.write("{0}".format(port_number))
        sys.stdout.write("\n")
        sys.stdout.flush()
        break

    #become a server socket
    s.listen(5)

    # Accept a connection.
    try:
        (clientsocket, address) = s.accept()

        while True:
            line = sys.stdin.readline()
            if not line:
                time.sleep(1)
                continue
            clientsocket.sendall(line)
    finally:
        clientsocket.close()

finally:
    s.close()

./nettail.py works as I expect, but the Tornado HTTP server is only handling one request at a time.

I would like to use long-running, persistent HTTP connections to do this, as it is compatible with older browsers. I understand that Web Sockets is how it would be done in modern browsers.

Edit: I'm running this on Linux and Solaris, not Windows. That means I could use tornado.iostream on the file, rather than through a sockets program. Still, that is not a documented feature, so I launch a sockets program for each connection.

After doing some more debugging, it turns out that this tail server was not blocking, after all.

I was trying to test concurrent connections with two windows of Firefox open, but Firefox would not start fetching the second window until the first window was manually stopped. I guess Firefox does not like to have two concurrent HTTP connections to fetch the same resource.

Opening a Firefox window and a Chromium window, I can see the "tail" output pushed to both tabs.

Thank you for all your help. @abarnert's comments were especially helpful.

Edit:

In the to-be-release 2.4.2 version of Tornado, a "Pipe" IOStream is implemented. Using this and regular "tail" simplified the code a lot.

import subprocess

import tornado.httpserver
import tornado.ioloop
import tornado.iostream
import tornado.options
import tornado.web

from tornado.options import define, options

define("port", default=8888, help="run on the given port", type=int)
define(
    "inputfile",
    default="test.txt",
    help="the path to the file which we will 'tail'",
    type=str)


class MainHandler(tornado.web.RequestHandler):
    @tornado.web.asynchronous
    def get(self):
        print "GOT REQUEST"
        self.p = subprocess.Popen(
            ["tail", "-f", options.inputfile, "-n+1"],
            stdout=subprocess.PIPE)

        self.write("<pre>")
        self.write("Hello, world\n")
        self.flush()

        self.stream = tornado.iostream.PipeIOStream(self.p.stdout.fileno())
        self.stream.read_until("\n", self.line_from_nettail)

    def on_connection_close(self, *args, **kwargs):
        """Clean up the nettail process when the connection is closed.
        """
        print "CONNECTION CLOSED!!!!"
        self.p.terminate()
        tornado.web.RequestHandler.on_connection_close(self, *args, **kwargs)

    def line_from_nettail(self, data):
        self.write(data)
        self.flush()
        self.stream.read_until("\n", self.line_from_nettail)

def main():
    tornado.options.parse_command_line()
    application = tornado.web.Application([
        (r"/", MainHandler),
    ])
    http_server = tornado.httpserver.HTTPServer(application)
    http_server.listen(options.port)
    tornado.ioloop.IOLoop.instance().start()


if __name__ == "__main__":
    main()

I created this recently as an experiment. Works for me with multiple connections is it any use?

class TailHandler(BaseHandler):
    @asynchronous
    def get(self):
        self.file = open('data/to_read.txt', 'r')
        self.pos = self.file.tell()

        def _read_file():
            line = self.file.read()
            last_pos = self.file.tell()
            if not line:
                self.file.close()
                self.file = open('data/to_read.txt', 'r')
                self.file.seek(last_pos)
                pass
            else:
                self.write(line)
                self.flush()

            IOLoop.instance().add_timeout(time.time() + 1, _read_file)
        _read_file()

You shouldn't have blocking calls like this in the handler.

    port_number = int(p.stdout.readline().strip())

You'll need to use select or a similar mechanism ti avoid the blocking call

Edit: ok I went and checked the docs. You should use their iostream to read from p

来源：https://stackoverflow.com/questions/14247918/how-would-i-implement-tail-over-http-with-python-tornado

标签

python

tornado