Consumiendo stream Twitter, alimentando javascript en tiempo real

Bueno hay mogollón de documentación del modulo tweepy, aquí el ejemplo de jugete que chupa los tuits georeferenciados en una área dada

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json

access_token = ""
access_token_secret = ""
consumer_key = ""
consumer_secret = ""

class myStreamListener(StreamListener):

	def on_data(self, data):
		#print data
		decoded = json.loads(data)
		if decoded["coordinates"] is not None:
			print decoded["coordinates"]["coordinates"]
		# Also, we convert UTF-8 to ASCII ignoring all bad characters sent by users
		#print '@%s: %s' % (decoded['user']['screen_name'], decoded['text'].encode('ascii', 'ignore'))
		#print ''
		return True

	def on_error(self, status):
		print status

if __name__ == '__main__':

	l = myStreamListener()
	auth = OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)
	stream = Stream(auth, l)

	geobox_world = [-180,-90,180,90]
	geobox_spain =[35.340503,-11.757813,43.217838,5.644531]
	geobox_catalunya = [42.857846,0.41748,40.667879,3.284912]
	#Para filtrar por cadena o hashtag stream.filter(track=['cholo simeone'],locations=geobox_world)
	stream.filter(locations=geobox_world)

Y el siguiente script utilizando gevent por un lado para gestionar la concurrencia y por el otro WebSocketHandler para abrir un socket y se puede servir un stream de datos hacia un cliente.

from geventwebsocket.handler import WebSocketHandler
from gevent import pywsgi
import gevent
import time

def app(environ, start_response):
    ws = environ['wsgi.websocket']
    contador = 0
    while True:
        strTemp = "hola" + str(contador) 
        ws.send( strTemp )
        time.sleep(1)
        contador = contador + 1

server = pywsgi.WSGIServer(('', 10000), app, handler_class=WebSocketHandler)
server.serve_forever()

Finalmente la suma de los dos:

import gevent
import gevent.monkey
gevent.monkey.patch_all()

from geventwebsocket.handler import WebSocketHandler
from gevent import pywsgi

from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json

access_token = ""
access_token_secret = ""
consumer_key = ""
consumer_secret = ""


class MyStreamListener(StreamListener):
    def __init__(self):
        self.sockets = []
        auth = OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        self.stream = Stream(auth, self)

    def add_socket(self, ws):
        self.sockets.append(ws)

    def run(self):
        try:
            self.stream.filter(track="#linux")
        except Exception:
            self.stream.disconnect()

    def start(self):
        gevent.spawn(self.run)

    def send(self, ws, coordinates):
        try:
            ws.send(json.dumps(coordinates))
        except Exception:
            # the web socket die..
            self.sockets.remove(ws)

    def on_data(self, data):
        decoded = json.loads(data)
        if decoded.get("coordinates", None) is not None:
            coordinates = decoded["coordinates"]["coordinates"]
            for ws in self.sockets:
                gevent.spawn(self.send, ws, coordinates)
        return True

    def on_error(self, status):
        print "Error", status

    def on_timeout(self):
        print "tweepy timeout.. wait 30 seconds"
        gevent.sleep(30)

stream_listener = MyStreamListener()
stream_listener.start()


def app(environ, start_response):
    ws = environ['wsgi.websocket']
    stream_listener.add_socket(ws)
    while not ws.closed:
        gevent.sleep(0.1)

server = pywsgi.WSGIServer(('', 10000), app, handler_class=WebSocketHandler)
server.serve_forever()

http://stackoverflow.com/questions/27882631/consuming-twitter-stream-with-tweepy-and-serving-content-via-websocket-with-geve

Tagged with:
Posted in Programación, python