| CODENOTIFIER | HelpYou are not signed inSign in |
Project: ThruDB
Revision: 358
Author: thai
Date: 22 Apr 2008 05:24:07
Diff at Trac: http://trac.thrudb.org/changeset/358
Changes:update documentation for tweetserch/py, refactor search/twitter.py to remove redundant code
Files:| ... | ...@@ -4,7 +4,7 @@ | |
| 4 | 4 | TEMPLATE_DEBUG = DEBUG |
| 5 | 5 | |
| 6 | 6 | ADMINS = ( |
| 7 | ('Thai Duong', 'thai@meetaa.com'), | |
| 7 | ('Thai Duong', 'thaidn@gmail.com'), | |
| 8 | 8 | ) |
| 9 | 9 | |
| 10 | 10 | MANAGERS = ADMINS |
| ... | ...@@ -45,7 +45,7 @@ | |
| 45 | 45 | # URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a |
| 46 | 46 | # trailing slash. |
| 47 | 47 | # Examples: "http://foo.com/media/", "/media/". |
| 48 | ADMIN_MEDIA_PREFIX = '' | |
| 48 | ADMIN_MEDIA_PREFIX = '/media/' | |
| 49 | 49 | |
| 50 | 50 | # Make this unique, and don't share it with anybody. |
| 51 | 51 | SECRET_KEY = '4bfqq(kaufg*un4rb_z8_%f%f094l8)6x)rn0l*4sf!(ur_%gx' |
| ... | ...@@ -1,36 +1,38 @@ | |
| 1 | HOW TO RUN | |
| 1 | ========================== | |
| 2 | = HOW TO RUN | |
| 3 | ========================== | |
| 2 | 4 | |
| 3 | 1. install django (http://www.djangoproject.com), apache, mod_python, and other necessary components | |
| 5 | 0. install django, see http://www.djangoproject.com for details | |
| 4 | 6 | |
| 5 | 2. catch the tweet by running ./search/twitter.py, you can see the processs in /tmp/twitter.log | |
| 7 | 1. install thrudb, see http://www.thrudb.org for details | |
| 6 | 8 | |
| 7 | 3. config apache with mod_python. Here's a sample conf: | |
| 9 | 2. install thrudex/thrudoc python libraries: | |
| 8 | 10 | |
| 9 | <VirtualHost *> | |
| 10 | ServerName tweetsearch.local | |
| 11 | DocumentRoot /path/to/thrudb/tutorial/tweetsearch/py | |
| 12 | <Location "/"> | |
| 13 | SetHandler python-program | |
| 14 | PythonHandler django.core.handlers.modpython | |
| 15 | SetEnv DJANGO_SETTINGS_MODULE py.settings | |
| 16 | SetEnv PYTHON_EGG_CACHE /tmp | |
| 17 | PythonDebug On | |
| 18 | PythonPath "['/path/to/thrudb/tutorial/tweetsearch/py'] + sys.path" | |
| 19 | </Location> | |
| 11 | $ cd thrudb/tutorial | |
| 12 | $ make | |
| 13 | $ sudo cp -pvr gen-py/Thrudex gen-py/Thrudoc /usr/lib/python2.5/site-packages/ | |
| 20 | 14 | |
| 21 | </VirtualHost> | |
| 15 | if you use python2.4, the last command should look like: | |
| 16 | $ sudo cp -pvr gen-py/Thrudex gen-py/Thrudoc /usr/lib/python2.4/site-packages/ | |
| 22 | 17 | |
| 23 | remember add this line to /etc/hosts: | |
| 24 | 127.0.0.1 tweetsearch.local | |
| 18 | 3. review thrudex/thrudoc configuration in thrudex.conf and thrudoc.conf respectively. if everything's okie, let's start thrudb: | |
| 25 | 19 | |
| 26 | 4. start apache, thrudex and thrudoc | |
| 20 | $ cd thrudb/tutorial | |
| 21 | $ ./thrudbctl start | |
| 27 | 22 | |
| 28 | 5. that's it! Contact me if you have any problem. | |
| 23 | 4. start grabbing tweets from http://www.twitter.com: | |
| 24 | $ cd thrudb/tutorial/tweetsearch/py | |
| 25 | $ ./search/twitter.py | |
| 29 | 26 | |
| 30 | Thai Duong (thaidn@gmail.com). | |
| 27 | The application will be running as a daemon. You can see the processs in /tmp/twitter.log, thrudex.log, and thrudoc.log | |
| 31 | 28 | |
| 29 | 5. start the django application | |
| 32 | 30 | |
| 31 | $ cd thrudb/tutorial/tweetsearch/py | |
| 32 | $ python manage.py runserver | |
| 33 | 33 | |
| 34 | the application is available at http://localhost:8000/ | |
| 34 | 35 | |
| 36 | 6. that's it! Contact me if you have any problem. | |
| 35 | 37 | |
| 36 | ||
| 38 | Thai Duong (thaidn@gmail.com). |
| ... | ...@@ -7,5 +7,5 @@ | |
| 7 | 7 | # Uncomment this for admin: |
| 8 | 8 | # (r'^admin/', include('django.contrib.admin.urls')), |
| 9 | 9 | # catch all |
| 10 | (r'^.*$', 'py.search.views.search') | |
| 10 | (r'^.*$', 'py.search.views.search'), | |
| 11 | 11 | ) |
| ... | ...@@ -27,7 +27,7 @@ | |
| 27 | 27 | THRUDOC_BUCKET = "tweets"; |
| 28 | 28 | THRUDEX_INDEX = "tweets"; |
| 29 | 29 | |
| 30 | class TweetCatcher(object): | |
| 30 | class TweetManager(object): | |
| 31 | 31 | def __init__(self, since_id=None): |
| 32 | 32 | self.connect_to_thrudoc() |
| 33 | 33 | self.connect_to_thrudex() |
| ... | ...@@ -71,7 +71,7 @@ | |
| 71 | 71 | def save_tweet(self, tweet): |
| 72 | 72 | self.thrudoc.put(THRUDOC_BUCKET, str(tweet["id"]), cjson.encode(tweet)) |
| 73 | 73 | |
| 74 | def run(self): | |
| 74 | def grab_tweet(self): | |
| 75 | 75 | while True: |
| 76 | 76 | # the random paramater used to avoid http caching by upstream provider |
| 77 | 77 | url = "http://twitter.com/statuses/public_timeline.json?since_id=%s&r=%s" % (self.since_id, random()) |
| ... | ...@@ -96,27 +96,6 @@ | |
| 96 | 96 | print e |
| 97 | 97 | continue |
| 98 | 98 | print "loaded %s tweets, last since_id %s" % (self.count, self.since_id) |
| 99 | ||
| 100 | class TweetManager(object): | |
| 101 | def __init__(self): | |
| 102 | self.connect_to_thrudoc() | |
| 103 | self.connect_to_thrudex() | |
| 104 | ||
| 105 | def connect_to_thrudoc(self): | |
| 106 | socket = TSocket('localhost', THRUDOC_PORT) | |
| 107 | transport = TFramedTransport(socket) | |
| 108 | protocol = TBinaryProtocol(transport) | |
| 109 | self.thrudoc = Thrudoc.Client(protocol) | |
| 110 | transport.open() | |
| 111 | self.thrudoc.admin("create_bucket", THRUDOC_BUCKET) | |
| 112 | ||
| 113 | def connect_to_thrudex(self): | |
| 114 | socket = TSocket('localhost', THRUDEX_PORT) | |
| 115 | transport = TFramedTransport(socket) | |
| 116 | protocol = TBinaryProtocol(transport) | |
| 117 | self.thrudex = Thrudex.Client(protocol) | |
| 118 | transport.open() | |
| 119 | self.thrudex.admin("create_index", THRUDEX_INDEX) | |
| 120 | 99 | |
| 121 | 100 | def search_tweet(self, terms, offset=0, limit=10): |
| 122 | 101 | q = ThrudexTypes.SearchQuery() |
| ... | ...@@ -147,12 +126,10 @@ | |
| 147 | 126 | doc.bucket = THRUDOC_BUCKET |
| 148 | 127 | doc.key = ele.key |
| 149 | 128 | docs.append(doc) |
| 150 | ||
| 151 | return docs | |
| 152 | ||
| 129 | return docs | |
| 153 | 130 | |
| 154 | 131 | if __name__ == "__main__": |
| 155 | 132 | import daemonize as dm |
| 156 | 133 | dm.daemonize('/dev/null','/tmp/twitter.log','/tmp/twitter.log') |
| 157 | tc = TweetCatcher() | |
| 158 | tc.run() | |
| 134 | tc = TweetManager() | |
| 135 | tc.grab_tweet() |