| CODENOTIFIER | HelpYou are not signed inSign in |
Project: ThruDB
Revision: 354
Author: jake
Date: 20 Apr 2008 21:17:52
Diff at Trac: http://trac.thrudb.org/changeset/354
Changes:Thai's django code
Files:| ... | ...@@ -0,0 +1,82 @@ | |
| 1 | # Django settings for tweetsearch project. | |
| 2 | ||
| 3 | DEBUG = True | |
| 4 | TEMPLATE_DEBUG = DEBUG | |
| 5 | ||
| 6 | ADMINS = ( | |
| 7 | ('Thai Duong', 'thai@meetaa.com'), | |
| 8 | ) | |
| 9 | ||
| 10 | MANAGERS = ADMINS | |
| 11 | ||
| 12 | DATABASE_ENGINE = '' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. | |
| 13 | DATABASE_NAME = '' # Or path to database file if using sqlite3. | |
| 14 | DATABASE_USER = '' # Not used with sqlite3. | |
| 15 | DATABASE_PASSWORD = '' # Not used with sqlite3. | |
| 16 | DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3. | |
| 17 | DATABASE_PORT = '' # Set to empty string for default. Not used with sqlite3. | |
| 18 | ||
| 19 | # Local time zone for this installation. Choices can be found here: | |
| 20 | # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name | |
| 21 | # although not all choices may be available on all operating systems. | |
| 22 | # If running in a Windows environment this must be set to the same as your | |
| 23 | # system time zone. | |
| 24 | TIME_ZONE = 'Asia/Saigon' | |
| 25 | ||
| 26 | # Language code for this installation. All choices can be found here: | |
| 27 | # http://www.i18nguy.com/unicode/language-identifiers.html | |
| 28 | LANGUAGE_CODE = 'en-us' | |
| 29 | ||
| 30 | SITE_ID = 1 | |
| 31 | ||
| 32 | # If you set this to False, Django will make some optimizations so as not | |
| 33 | # to load the internationalization machinery. | |
| 34 | USE_I18N = True | |
| 35 | ||
| 36 | # Absolute path to the directory that holds media. | |
| 37 | # Example: "/home/media/media.lawrence.com/" | |
| 38 | MEDIA_ROOT = '' | |
| 39 | ||
| 40 | # URL that handles the media served from MEDIA_ROOT. Make sure to use a | |
| 41 | # trailing slash if there is a path component (optional in other cases). | |
| 42 | # Examples: "http://media.lawrence.com", "http://example.com/media/" | |
| 43 | MEDIA_URL = '' | |
| 44 | ||
| 45 | # URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a | |
| 46 | # trailing slash. | |
| 47 | # Examples: "http://foo.com/media/", "/media/". | |
| 48 | ADMIN_MEDIA_PREFIX = '' | |
| 49 | ||
| 50 | # Make this unique, and don't share it with anybody. | |
| 51 | SECRET_KEY = '4bfqq(kaufg*un4rb_z8_%f%f094l8)6x)rn0l*4sf!(ur_%gx' | |
| 52 | ||
| 53 | # List of callables that know how to import templates from various sources. | |
| 54 | TEMPLATE_LOADERS = ( | |
| 55 | 'django.template.loaders.filesystem.load_template_source', | |
| 56 | 'django.template.loaders.app_directories.load_template_source', | |
| 57 | # 'django.template.loaders.eggs.load_template_source', | |
| 58 | ) | |
| 59 | ||
| 60 | MIDDLEWARE_CLASSES = ( | |
| 61 | 'django.middleware.common.CommonMiddleware', | |
| 62 | 'django.contrib.sessions.middleware.SessionMiddleware', | |
| 63 | 'django.contrib.auth.middleware.AuthenticationMiddleware', | |
| 64 | 'django.middleware.doc.XViewMiddleware', | |
| 65 | ) | |
| 66 | ||
| 67 | ROOT_URLCONF = 'tweetsearch.urls' | |
| 68 | ||
| 69 | TEMPLATE_DIRS = ( | |
| 70 | "/home/thaidn/buildthrudb/thrudb/tutorial/tweetsearch" | |
| 71 | # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". | |
| 72 | # Always use forward slashes, even on Windows. | |
| 73 | # Don't forget to use absolute paths, not relative paths. | |
| 74 | ) | |
| 75 | ||
| 76 | INSTALLED_APPS = ( | |
| 77 | 'django.contrib.auth', | |
| 78 | 'django.contrib.contenttypes', | |
| 79 | 'django.contrib.sessions', | |
| 80 | 'django.contrib.sites', | |
| 81 | 'tweetsearch.search', | |
| 82 | ) |
| ... | ...@@ -0,0 +1,35 @@ | |
| 1 | # Create your views here. | |
| 2 | from django.shortcuts import render_to_response | |
| 3 | from twitter import TweetManager | |
| 4 | from time import time | |
| 5 | ||
| 6 | tm = TweetManager() | |
| 7 | ||
| 8 | def search(request): | |
| 9 | query = request.GET.get('q', '').encode('utf-8') | |
| 10 | offset = request.GET.get('offset', '') | |
| 11 | if offset: | |
| 12 | offset = int(offset) | |
| 13 | else: | |
| 14 | offset = 0 | |
| 15 | tweets = None | |
| 16 | total = None | |
| 17 | next = None | |
| 18 | prev = None | |
| 19 | took = None | |
| 20 | if query: | |
| 21 | t0 = time() | |
| 22 | (total, tweets) = tm.search_tweet(query, offset) | |
| 23 | took = "%.2f" % (time() - t0) | |
| 24 | next = (total > len(tweets) + offset) and (offset + len(tweets)) or None | |
| 25 | prev = (offset > 0) and (offset - 10) or None | |
| 26 | ||
| 27 | return render_to_response("search/template.html", { | |
| 28 | "tweets": tweets, | |
| 29 | "query": query, | |
| 30 | "took" : took, | |
| 31 | "total" : total, | |
| 32 | "current": offset, | |
| 33 | "next": next, | |
| 34 | "prev": prev | |
| 35 | }) |
| ... | ...@@ -0,0 +1,157 @@ | |
| 1 | #!/usr/bin/env python | |
| 2 | ||
| 3 | ||
| 4 | from thrift import Thrift | |
| 5 | from thrift.transport.TSocket import TSocket | |
| 6 | from thrift.transport.TTransport import TFramedTransport, TMemoryBuffer | |
| 7 | from thrift.protocol.TBinaryProtocol import TBinaryProtocol | |
| 8 | ||
| 9 | from Thrudoc import Thrudoc, ttypes as ThrudocTypes | |
| 10 | from Thrudex import Thrudex, ttypes as ThrudexTypes | |
| 11 | ||
| 12 | from random import random | |
| 13 | from time import time | |
| 14 | ||
| 15 | import urllib2 | |
| 16 | import cjson | |
| 17 | import sys | |
| 18 | import socket | |
| 19 | # timeout in seconds | |
| 20 | TIMEOUT = 10 | |
| 21 | socket.setdefaulttimeout(TIMEOUT) | |
| 22 | ||
| 23 | # Config Constants | |
| 24 | THRUDEX_PORT = 11299; | |
| 25 | THRUDOC_PORT = 11291; | |
| 26 | ||
| 27 | THRUDOC_BUCKET = "tweets"; | |
| 28 | THRUDEX_INDEX = "tweets"; | |
| 29 | ||
| 30 | class TweetCatcher(object): | |
| 31 | def __init__(self, since_id=None): | |
| 32 | self.connect_to_thrudoc() | |
| 33 | self.connect_to_thrudex() | |
| 34 | self.since_id = since_id | |
| 35 | self.count = 0 | |
| 36 | ||
| 37 | def connect_to_thrudoc(self): | |
| 38 | socket = TSocket('localhost', THRUDOC_PORT) | |
| 39 | transport = TFramedTransport(socket) | |
| 40 | protocol = TBinaryProtocol(transport) | |
| 41 | self.thrudoc = Thrudoc.Client(protocol) | |
| 42 | transport.open() | |
| 43 | self.thrudoc.admin("create_bucket", THRUDOC_BUCKET) | |
| 44 | ||
| 45 | def connect_to_thrudex(self): | |
| 46 | socket = TSocket('localhost', THRUDEX_PORT) | |
| 47 | transport = TFramedTransport(socket) | |
| 48 | protocol = TBinaryProtocol(transport) | |
| 49 | self.thrudex = Thrudex.Client(protocol) | |
| 50 | transport.open() | |
| 51 | self.thrudex.admin("create_index", THRUDEX_INDEX) | |
| 52 | ||
| 53 | def index_tweet(self, tweet): | |
| 54 | doc = ThrudexTypes.Document() | |
| 55 | doc.key = str(tweet["id"]) | |
| 56 | doc.index = THRUDEX_INDEX | |
| 57 | doc.fields = [] | |
| 58 | ||
| 59 | field = ThrudexTypes.Field() | |
| 60 | field.key = "text" | |
| 61 | field.value = tweet["text"].encode('utf-8') | |
| 62 | doc.fields.append(field) | |
| 63 | ||
| 64 | field = ThrudexTypes.Field() | |
| 65 | field.key = "date" | |
| 66 | field.value = tweet["created_at"] | |
| 67 | field.sortable = True | |
| 68 | doc.fields.append(field) | |
| 69 | self.thrudex.put(doc) | |
| 70 | ||
| 71 | def save_tweet(self, tweet): | |
| 72 | self.thrudoc.put(THRUDOC_BUCKET, str(tweet["id"]), cjson.encode(tweet)) | |
| 73 | ||
| 74 | def run(self): | |
| 75 | while True: | |
| 76 | # the random paramater used to avoid http caching by upstream provider | |
| 77 | url = "http://twitter.com/statuses/public_timeline.json?since_id=%s&r=%s" % (self.since_id, random()) | |
| 78 | #just skip on error | |
| 79 | try: | |
| 80 | response = urllib2.urlopen(url) | |
| 81 | json = response.read() | |
| 82 | tweets = cjson.decode(json) | |
| 83 | except: | |
| 84 | continue | |
| 85 | for tweet in tweets: | |
| 86 | # somehow sometimes tweet is not a dictionary, check to make sure | |
| 87 | if isinstance(tweet, dict): | |
| 88 | try: | |
| 89 | self.index_tweet(tweet) | |
| 90 | self.save_tweet(tweet) | |
| 91 | self.count = self.count + 1 | |
| 92 | self.since_id = tweet["id"] | |
| 93 | except: | |
| 94 | # sth serious wrong happened in thrudb, better to raise the error and continue | |
| 95 | raise | |
| 96 | continue | |
| 97 | print "loaded %s tweets, last since_id %s" % (self.count, self.since_id) | |
| 98 | ||
| 99 | class TweetManager(object): | |
| 100 | def __init__(self): | |
| 101 | self.connect_to_thrudoc() | |
| 102 | self.connect_to_thrudex() | |
| 103 | ||
| 104 | def connect_to_thrudoc(self): | |
| 105 | socket = TSocket('localhost', THRUDOC_PORT) | |
| 106 | transport = TFramedTransport(socket) | |
| 107 | protocol = TBinaryProtocol(transport) | |
| 108 | self.thrudoc = Thrudoc.Client(protocol) | |
| 109 | transport.open() | |
| 110 | self.thrudoc.admin("create_bucket", THRUDOC_BUCKET) | |
| 111 | ||
| 112 | def connect_to_thrudex(self): | |
| 113 | socket = TSocket('localhost', THRUDEX_PORT) | |
| 114 | transport = TFramedTransport(socket) | |
| 115 | protocol = TBinaryProtocol(transport) | |
| 116 | self.thrudex = Thrudex.Client(protocol) | |
| 117 | transport.open() | |
| 118 | self.thrudex.admin("create_index", THRUDEX_INDEX) | |
| 119 | ||
| 120 | def search_tweet(self, terms, offset=0, limit=10): | |
| 121 | q = ThrudexTypes.SearchQuery() | |
| 122 | q.index = THRUDEX_INDEX | |
| 123 | q.query = "+text:(%s)" % terms | |
| 124 | q.offset = offset | |
| 125 | q.limit = limit | |
| 126 | q.sortby = 'date' | |
| 127 | q.desc = True | |
| 128 | ||
| 129 | ids = self.thrudex.search(q) | |
| 130 | tweets = [] | |
| 131 | if len(ids.elements) > 0: | |
| 132 | list_response = self.thrudoc.getList(self.create_doc_list(ids.elements)) | |
| 133 | for ele in list_response: | |
| 134 | if ele.element.value != '': | |
| 135 | tweet = cjson.decode(ele.element.value) | |
| 136 | tweet["profile_image_url"] = tweet["user"]["profile_image_url"].replace("\\", "") | |
| 137 | tweet["user_name"] = tweet["user"]["screen_name"] | |
| 138 | tweet["text"] = tweet["text"].replace("\\", "") | |
| 139 | tweets.append(tweet) | |
| 140 | return ids.total, tweets | |
| 141 | ||
| 142 | def create_doc_list(self, ids): | |
| 143 | docs = [] | |
| 144 | for pointer, ele in enumerate(ids): | |
| 145 | doc = ThrudocTypes.Element() | |
| 146 | doc.bucket = THRUDOC_BUCKET | |
| 147 | doc.key = ele.key | |
| 148 | docs.append(doc) | |
| 149 | ||
| 150 | return docs | |
| 151 | ||
| 152 | ||
| 153 | if __name__ == "__main__": | |
| 154 | import daemonize as dm | |
| 155 | dm.daemonize('/dev/null','/tmp/twitter.log','/tmp/twitter.log') | |
| 156 | tc = TweetCatcher() | |
| 157 | tc.run() |
| ... | ...@@ -0,0 +1,82 @@ | |
| 1 | #!/usr/bin/env python | |
| 2 | import sys, os | |
| 3 | ||
| 4 | '''This module is used to fork the current process into a daemon. | |
| 5 | ||
| 6 | Almost none of this is necessary (or advisable) if your daemon | |
| 7 | is being started by inetd. In that case, stdin, stdout and stderr are | |
| 8 | all set up for you to refer to the network connection, and the fork()s | |
| 9 | and session manipulation should not be done (to avoid confusing inetd). | |
| 10 | Only the chdir() and umask() steps remain as useful. | |
| 11 | ||
| 12 | References: | |
| 13 | UNIX Programming FAQ | |
| 14 | 1.7 How do I get my program to act like a daemon? | |
| 15 | http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16 | |
| 16 | ||
| 17 | Advanced Programming in the Unix Environment | |
| 18 | W. Richard Stevens, 1992, Addison-Wesley, ISBN 0-201-56317-7. | |
| 19 | ''' | |
| 20 | ||
| 21 | def daemonize (stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'): | |
| 22 | '''This forks the current process into a daemon. | |
| 23 | The stdin, stdout, and stderr arguments are file names that | |
| 24 | will be opened and be used to replace the standard file descriptors | |
| 25 | in sys.stdin, sys.stdout, and sys.stderr. | |
| 26 | These arguments are optional and default to /dev/null. | |
| 27 | Note that stderr is opened unbuffered, so | |
| 28 | if it shares a file with stdout then interleaved output | |
| 29 | may not appear in the order that you expect. | |
| 30 | ''' | |
| 31 | ||
| 32 | # Do first fork. | |
| 33 | try: | |
| 34 | pid = os.fork() | |
| 35 | if pid > 0: | |
| 36 | sys.exit(0) # Exit first parent. | |
| 37 | except OSError, e: | |
| 38 | sys.stderr.write ("fork #1 failed: (%d) %s\n" % (e.errno, e.strerror) ) | |
| 39 | sys.exit(1) | |
| 40 | ||
| 41 | # Decouple from parent environment. | |
| 42 | os.chdir("/") | |
| 43 | os.umask(0) | |
| 44 | os.setsid() | |
| 45 | ||
| 46 | # Do second fork. | |
| 47 | try: | |
| 48 | pid = os.fork() | |
| 49 | if pid > 0: | |
| 50 | sys.exit(0) # Exit second parent. | |
| 51 | except OSError, e: | |
| 52 | sys.stderr.write ("fork #2 failed: (%d) %s\n" % (e.errno, e.strerror) ) | |
| 53 | sys.exit(1) | |
| 54 | ||
| 55 | # Now I am a daemon! | |
| 56 | ||
| 57 | # Redirect standard file descriptors. | |
| 58 | si = open(stdin, 'r') | |
| 59 | so = open(stdout, 'a+') | |
| 60 | se = open(stderr, 'a+', 0) | |
| 61 | os.dup2(si.fileno(), sys.stdin.fileno()) | |
| 62 | os.dup2(so.fileno(), sys.stdout.fileno()) | |
| 63 | os.dup2(se.fileno(), sys.stderr.fileno()) | |
| 64 | ||
| 65 | def main (): | |
| 66 | '''This is an example main function run by the daemon. | |
| 67 | This prints a count and timestamp once per second. | |
| 68 | ''' | |
| 69 | import time | |
| 70 | sys.stdout.write ('Daemon started with pid %d\n' % os.getpid() ) | |
| 71 | sys.stdout.write ('Daemon stdout output\n') | |
| 72 | sys.stderr.write ('Daemon stderr output\n') | |
| 73 | c = 0 | |
| 74 | while 1: | |
| 75 | sys.stdout.write ('%d: %s\n' % (c, time.ctime(time.time())) ) | |
| 76 | sys.stdout.flush() | |
| 77 | c = c + 1 | |
| 78 | time.sleep(1) | |
| 79 | ||
| 80 | if __name__ == "__main__": | |
| 81 | daemonize('/dev/null','/tmp/daemon.log','/tmp/daemon.log') | |
| 82 | main() |
| ... | ...@@ -0,0 +1,36 @@ | |
| 1 | HOW TO RUN | |
| 2 | ||
| 3 | 1. install django (http://www.djangoproject.com), apache, mod_python, and other necessary components | |
| 4 | ||
| 5 | 2. catch the tweet by running ./search/twitter.py, you can see the processs in /tmp/twitter.log | |
| 6 | ||
| 7 | 3. config apache with mod_python. Here's a sample conf: | |
| 8 | ||
| 9 | <VirtualHost *> | |
| 10 | ServerName tweetsearch.local | |
| 11 | DocumentRoot /path/to/thrudb/tutorial/tweetsearch | |
| 12 | <Location "/"> | |
| 13 | SetHandler python-program | |
| 14 | PythonHandler django.core.handlers.modpython | |
| 15 | SetEnv DJANGO_SETTINGS_MODULE tweetsearch.settings | |
| 16 | SetEnv PYTHON_EGG_CACHE /tmp | |
| 17 | PythonDebug On | |
| 18 | PythonPath "['/path/to/thrudb/tutorial'] + sys.path" | |
| 19 | </Location> | |
| 20 | ||
| 21 | </VirtualHost> | |
| 22 | ||
| 23 | remember add this line to /etc/hosts: | |
| 24 | 127.0.0.1 tweetsearch.local | |
| 25 | ||
| 26 | 4. start apache, thrudex and thrudoc | |
| 27 | ||
| 28 | 5. that's it! Contact me if you have any problem. | |
| 29 | ||
| 30 | Thai Duong (thaidn@gmail.com). | |
| 31 | ||
| 32 | ||
| 33 | ||
| 34 | ||
| 35 | ||
| 36 |
| ... | ...@@ -0,0 +1,11 @@ | |
| 1 | from django.conf.urls.defaults import * | |
| 2 | ||
| 3 | urlpatterns = patterns('', | |
| 4 | # Example: | |
| 5 | (r'^$', 'tweetsearch.search.views.search'), | |
| 6 | (r'^search/', 'tweetsearch.search.views.search'), | |
| 7 | # Uncomment this for admin: | |
| 8 | # (r'^admin/', include('django.contrib.admin.urls')), | |
| 9 | # catch all | |
| 10 | (r'^.*$', 'tweetsearch.search.views.search') | |
| 11 | ) |
| ... | ...@@ -0,0 +1,4 @@ | |
| 1 | from django.db import models | |
| 2 | ||
| 3 | # Create your models here. | |
| 4 |
| ... | ...@@ -0,0 +1,11 @@ | |
| 1 | #!/usr/bin/env python | |
| 2 | from django.core.management import execute_manager | |
| 3 | try: | |
| 4 | import settings # Assumed to be in the same directory. | |
| 5 | except ImportError: | |
| 6 | import sys | |
| 7 | sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__) | |
| 8 | sys.exit(1) | |
| 9 | ||
| 10 | if __name__ == "__main__": | |
| 11 | execute_manager(settings) |
| ... | ...@@ -0,0 +1,70 @@ | |
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
| 2 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
| 3 | <head> | |
| 4 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
| 5 | <meta http-equiv="Content-Language" content="en-us" /> | |
| 6 | <meta http-equiv="imagetoolbar" content="no"> | |
| 7 | <meta name="viewport" content="width=780" /> | |
| 8 | </head> | |
| 9 | <body> | |
| 10 | ||
| 11 | <div style="margin-left:15%;width:650px"> | |
| 12 | ||
| 13 | <div style="text-align:center"> | |
| 14 | <h2>TweetSearch - Realtime Twitter Search</h2> | |
| 15 | <form action="" method="get"> | |
| 16 | <input type="text" name="q" value="{{ query|escape }}"/> | |
| 17 | <input type="submit"/ value="search"> | |
| 18 | <br/> | |
| 19 | </form> | |
| 20 | </div> | |
| 21 | <br/> | |
| 22 | ||
| 23 | {% if query %} | |
| 24 | {{ total }} results for: <b>"{{ query|escape }}"</b> showing ({{ current }} - {{ next }}) | |
| 25 | <br/> | |
| 26 | ||
| 27 | <div style="float:right"> | |
| 28 | {% if prev %} | |
| 29 | <a href="?q={{ query|escape }}&offset={{ prev }}"> <prev </a> | |
| 30 | {% endif %} | |
| 31 | {% ifequal current 10 %} | |
| 32 | <a href="?q={{ query|escape }}&offset={{ 0 }}"> <prev </a> | |
| 33 | {% endifequal %} | |
| 34 | {% if next %} | |
| 35 | <a href="?q={{ query|escape }}&offset={{ next }}"> next > </a> | |
| 36 | {% endif %} | |
| 37 | </div> | |
| 38 | ||
| 39 | {% for tweet in tweets %} | |
| 40 | ||
| 41 | <br/><img src="{{ tweet.profile_image_url }}"/><a href="http://twitter.com/{{ tweet.user_name }}/statuses/{{ tweet.id }}">{{ tweet.user_name|escape }}</a> : {{ tweet.text|escape|urlize }} <i>{{ tweet.created_at }}</i></li> | |
| 42 | ||
| 43 | {% endfor %} | |
| 44 | ||
| 45 | {% endif %} | |
| 46 | ||
| 47 | <br/><br/> | |
| 48 | ||
| 49 | <div style="float:right"> | |
| 50 | {% if prev %} | |
| 51 | <a href="?q={{ query|escape }}&offset={{ prev }}"> <prev </a> | |
| 52 | {% endif %} | |
| 53 | {% ifequal current 10 %} | |
| 54 | <a href="?q={{ query|escape }}&offset={{ 0 }}"> <prev </a> | |
| 55 | {% endifequal %} | |
| 56 | {% if next %} | |
| 57 | <a href="?q={{ query|escape }}&offset={{ next }}"> next > </a> | |
| 58 | {% endif %} | |
| 59 | </div> | |
| 60 | ||
| 61 | ||
| 62 | {% if took %} | |
| 63 | took {{ took }} | |
| 64 | {% endif %} | |
| 65 | ||
| 66 | </div> | |
| 67 | ||
| 68 | ||
| 69 | </body> | |
| 70 | </html> |