| CODENOTIFIER | HelpYou are not signed inSign in |
Project: Revactor
Revision: 134
Author: tarcieri
Date: 28 May 2008 13:54:45
Changes:Move HttpFetcher out of examples and under lib
Files:| ... | ...@@ -1,95 +0,0 @@ | |
| 1 | require 'zlib' | |
| 2 | require 'stringio' | |
| 3 | ||
| 4 | require 'rubygems' | |
| 5 | require 'revactor' | |
| 6 | ||
| 7 | # A concurrent HTTP fetcher, implemented using a central dispatcher which | |
| 8 | # scatters requests to a worker pool. | |
| 9 | # | |
| 10 | # The HttpFetcher class is callback-driven and intended for subclassing. | |
| 11 | # When a request completes successfully, the on_success callback is called. | |
| 12 | # An on_failure callback represents non-200 HTTP responses, and on_error | |
| 13 | # delivers any exceptions which occured during the fetch. | |
| 14 | class HttpFetcher | |
| 15 | def initialize(nworkers = 8) | |
| 16 | @_nworkers = nworkers | |
| 17 | @_workers, @_queue = [], [] | |
| 18 | nworkers.times { @_workers << Worker.spawn(Actor.current) } | |
| 19 | end | |
| 20 | ||
| 21 | def get(url, *args) | |
| 22 | if @_workers.empty? | |
| 23 | @_queue << T[url, args] | |
| 24 | else | |
| 25 | @_workers.shift << T[:fetch, url, args] | |
| 26 | end | |
| 27 | end | |
| 28 | ||
| 29 | def run | |
| 30 | while true | |
| 31 | Actor.receive do |filter| | |
| 32 | filter.when(T[:ready]) do |_, worker| | |
| 33 | if @_queue.empty? | |
| 34 | @_workers << worker | |
| 35 | on_empty if @_workers.size == @_nworkers | |
| 36 | else | |
| 37 | worker << T[:fetch, *@_queue.shift] | |
| 38 | end | |
| 39 | end | |
| 40 | ||
| 41 | filter.when(T[:fetched]) { |_, url, document, args| on_success url, document, *args } | |
| 42 | filter.when(T[:failed]) { |_, url, status, args| on_failure url, status, *args } | |
| 43 | filter.when(T[:error]) { |_, url, ex, args| on_error url, ex, *args } | |
| 44 | end | |
| 45 | end | |
| 46 | end | |
| 47 | ||
| 48 | def on_success(url, document, *args); end | |
| 49 | def on_failure(url, status, *args); end | |
| 50 | def on_error(url, ex, *args); end | |
| 51 | def on_empty; exit; end | |
| 52 | ||
| 53 | class Worker | |
| 54 | extend Actorize | |
| 55 | ||
| 56 | def initialize(fetcher) | |
| 57 | @fetcher = fetcher | |
| 58 | loop { wait_for_request } | |
| 59 | end | |
| 60 | ||
| 61 | def wait_for_request | |
| 62 | Actor.receive do |filter| | |
| 63 | filter.when(T[:fetch]) do |_, url, args| | |
| 64 | begin | |
| 65 | fetch url, args | |
| 66 | rescue => ex | |
| 67 | @fetcher << T[:error, url, ex, args] | |
| 68 | end | |
| 69 | ||
| 70 | # FIXME this should be unnecessary, but the HTTP client "leaks" messages | |
| 71 | Actor.current.mailbox.clear | |
| 72 | @fetcher << T[:ready, Actor.current] | |
| 73 | end | |
| 74 | end | |
| 75 | end | |
| 76 | ||
| 77 | def fetch(url, args) | |
| 78 | Actor::HttpClient.get(url, :head => {'Accept-Encoding' => 'gzip'}) do |response| | |
| 79 | if response.status == 200 | |
| 80 | @fetcher << T[:fetched, url, decode_body(response), args] | |
| 81 | else | |
| 82 | @fetcher << T[:failed, url, response.status, args] | |
| 83 | end | |
| 84 | end | |
| 85 | end | |
| 86 | ||
| 87 | def decode_body(response) | |
| 88 | if response.content_encoding == 'gzip' | |
| 89 | Zlib::GzipReader.new(StringIO.new(response.body)).read | |
| 90 | else | |
| 91 | response.body | |
| 92 | end | |
| 93 | end | |
| 94 | end | |
| 95 | end | |
| 96 | 0 | \ No newline at end of file |
| ... | ...@@ -1,5 +1,11 @@ | |
| 1 | 1 | 0.1.5: |
| 2 | 2 | |
| 3 | * Add Revactor::HttpFetcher, a concurrent HTTP fetcher using | |
| 4 | Revactor::HttpClient | |
| 5 | ||
| 6 | * Allow Revactor::HttpClient to take a block for requests, and handle | |
| 7 | closing sockets automatically when the block has been evaluated | |
| 8 | ||
| 3 | 9 | * Change Revactor::Filter setup to express initialize args as Tuples |
| 4 | 10 | |
| 5 | 11 | 0.1.4: |
| ... | ...@@ -0,0 +1,100 @@ | |
| 1 | #-- | |
| 2 | # Copyright (C)2007 Tony Arcieri | |
| 3 | # You can redistribute this under the terms of the Ruby license | |
| 4 | # See file LICENSE for details | |
| 5 | #++ | |
| 6 | ||
| 7 | require 'zlib' | |
| 8 | require 'stringio' | |
| 9 | ||
| 10 | module Revactor | |
| 11 | # A concurrent HTTP fetcher, implemented using a central dispatcher which | |
| 12 | # scatters requests to a worker pool. | |
| 13 | # | |
| 14 | # The HttpFetcher class is callback-driven and intended for subclassing. | |
| 15 | # When a request completes successfully, the on_success callback is called. | |
| 16 | # An on_failure callback represents non-200 HTTP responses, and on_error | |
| 17 | # delivers any exceptions which occured during the fetch. | |
| 18 | class HttpFetcher | |
| 19 | def initialize(nworkers = 8) | |
| 20 | @_nworkers = nworkers | |
| 21 | @_workers, @_queue = [], [] | |
| 22 | nworkers.times { @_workers << Worker.spawn(Actor.current) } | |
| 23 | end | |
| 24 | ||
| 25 | def get(url, *args) | |
| 26 | if @_workers.empty? | |
| 27 | @_queue << T[url, args] | |
| 28 | else | |
| 29 | @_workers.shift << T[:fetch, url, args] | |
| 30 | end | |
| 31 | end | |
| 32 | ||
| 33 | def run | |
| 34 | while true | |
| 35 | Actor.receive do |filter| | |
| 36 | filter.when(T[:ready]) do |_, worker| | |
| 37 | if @_queue.empty? | |
| 38 | @_workers << worker | |
| 39 | on_empty if @_workers.size == @_nworkers | |
| 40 | else | |
| 41 | worker << T[:fetch, *@_queue.shift] | |
| 42 | end | |
| 43 | end | |
| 44 | ||
| 45 | filter.when(T[:fetched]) { |_, url, document, args| on_success url, document, *args } | |
| 46 | filter.when(T[:failed]) { |_, url, status, args| on_failure url, status, *args } | |
| 47 | filter.when(T[:error]) { |_, url, ex, args| on_error url, ex, *args } | |
| 48 | end | |
| 49 | end | |
| 50 | end | |
| 51 | ||
| 52 | def on_success(url, document, *args); end | |
| 53 | def on_failure(url, status, *args); end | |
| 54 | def on_error(url, ex, *args); end | |
| 55 | def on_empty; exit; end | |
| 56 | ||
| 57 | class Worker | |
| 58 | extend Actorize | |
| 59 | ||
| 60 | def initialize(fetcher) | |
| 61 | @fetcher = fetcher | |
| 62 | loop { wait_for_request } | |
| 63 | end | |
| 64 | ||
| 65 | def wait_for_request | |
| 66 | Actor.receive do |filter| | |
| 67 | filter.when(T[:fetch]) do |_, url, args| | |
| 68 | begin | |
| 69 | fetch url, args | |
| 70 | rescue => ex | |
| 71 | @fetcher << T[:error, url, ex, args] | |
| 72 | end | |
| 73 | ||
| 74 | # FIXME this should be unnecessary, but the HTTP client "leaks" messages | |
| 75 | Actor.current.mailbox.clear | |
| 76 | @fetcher << T[:ready, Actor.current] | |
| 77 | end | |
| 78 | end | |
| 79 | end | |
| 80 | ||
| 81 | def fetch(url, args) | |
| 82 | Actor::HttpClient.get(url, :head => {'Accept-Encoding' => 'gzip'}) do |response| | |
| 83 | if response.status == 200 | |
| 84 | @fetcher << T[:fetched, url, decode_body(response), args] | |
| 85 | else | |
| 86 | @fetcher << T[:failed, url, response.status, args] | |
| 87 | end | |
| 88 | end | |
| 89 | end | |
| 90 | ||
| 91 | def decode_body(response) | |
| 92 | if response.content_encoding == 'gzip' | |
| 93 | Zlib::GzipReader.new(StringIO.new(response.body)).read | |
| 94 | else | |
| 95 | response.body | |
| 96 | end | |
| 97 | end | |
| 98 | end | |
| 99 | end | |
| 100 | end | |
| 0 | 101 | \ No newline at end of file |