File: //usr/share/rubygems-integration/all/gems/mechanize-2.7.7/lib/mechanize/pluggable_parsers.rb
require 'mechanize/file'
require 'mechanize/file_saver'
require 'mechanize/page'
require 'mechanize/xml_file'
require 'mime/types'
##
# Mechanize allows different parsers for different content types. Mechanize
# uses PluggableParser to determine which parser to use for any content type.
# To use your own parser or to change the default parsers, register them with
# this class through Mechanize#pluggable_parser.
#
# The default parser for unregistered content types is Mechanize::File.
#
# The module Mechanize::Parser provides basic functionality for any content
# type, so you may use it in custom parsers you write. For small files you
# wish to perform in-memory operations on, you should subclass
# Mechanize::File. For large files you should subclass Mechanize::Download as
# the content is only loaded into memory in small chunks.
#
# When writing your own pluggable parser, be sure to provide a method #body
# that returns a String containing the response body for compatibility with
# Mechanize#get_file.
#
# == Example
#
# To create your own parser, just create a class that takes four parameters in
# the constructor. Here is an example of registering a parser that handles
# CSV files:
#
# require 'csv'
#
# class CSVParser < Mechanize::File
# attr_reader :csv
#
# def initialize uri = nil, response = nil, body = nil, code = nil
# super uri, response, body, code
# @csv = CSV.parse body
# end
# end
#
# agent = Mechanize.new
# agent.pluggable_parser.csv = CSVParser
# agent.get('http://example.com/test.csv') # => CSVParser
#
# Now any response with a content type of 'text/csv' will initialize a
# CSVParser and return that object to the caller.
#
# To register a parser for a content type that Mechanize does not know about,
# use the hash syntax:
#
# agent.pluggable_parser['text/something'] = SomeClass
#
# To set the default parser, use #default:
#
# agent.pluggable_parser.default = Mechanize::Download
#
# Now all unknown content types will be saved to disk and not loaded into
# memory.
class Mechanize::PluggableParser
CONTENT_TYPES = {
:html => 'text/html',
:wap => 'application/vnd.wap.xhtml+xml',
:xhtml => 'application/xhtml+xml',
:pdf => 'application/pdf',
:csv => 'text/csv',
:xml => ['text/xml', 'application/xml'],
}
InvalidContentTypeError =
if defined?(MIME::Type::InvalidContentType)
# For mime-types >=2.1
MIME::Type::InvalidContentType
else
# For mime-types <2.1
MIME::InvalidContentType
end
attr_accessor :default
def initialize
@parsers = {
CONTENT_TYPES[:html] => Mechanize::Page,
CONTENT_TYPES[:xhtml] => Mechanize::Page,
CONTENT_TYPES[:wap] => Mechanize::Page,
'image' => Mechanize::Image,
'text/xml' => Mechanize::XmlFile,
'application/xml' => Mechanize::XmlFile,
}
@default = Mechanize::File
end
##
# Returns the parser registered for the given +content_type+
def parser content_type
return default unless content_type
parser = @parsers[content_type]
return parser if parser
mime_type = MIME::Type.new content_type
parser = @parsers[mime_type.to_s] ||
@parsers[mime_type.simplified] ||
# Starting from mime-types 3.0 x-prefix is deprecated as per IANA
(@parsers[MIME::Type.simplified(mime_type.to_s, remove_x_prefix: true)] rescue nil) ||
@parsers[mime_type.media_type] ||
default
rescue InvalidContentTypeError
default
end
def register_parser content_type, klass # :nodoc:
@parsers[content_type] = klass
end
##
# Registers +klass+ as the parser for text/html and application/xhtml+xml
# content
def html=(klass)
register_parser(CONTENT_TYPES[:html], klass)
register_parser(CONTENT_TYPES[:xhtml], klass)
end
##
# Registers +klass+ as the parser for application/xhtml+xml content
def xhtml=(klass)
register_parser(CONTENT_TYPES[:xhtml], klass)
end
##
# Registers +klass+ as the parser for application/pdf content
def pdf=(klass)
register_parser(CONTENT_TYPES[:pdf], klass)
end
##
# Registers +klass+ as the parser for text/csv content
def csv=(klass)
register_parser(CONTENT_TYPES[:csv], klass)
end
##
# Registers +klass+ as the parser for text/xml content
def xml=(klass)
CONTENT_TYPES[:xml].each do |content_type|
register_parser content_type, klass
end
end
##
# Retrieves the parser for +content_type+ content
def [](content_type)
@parsers[content_type]
end
##
# Sets the parser for +content_type+ content to +klass+
#
# The +content_type+ may either be a full MIME type a simplified MIME type
# ('text/x-csv' simplifies to 'text/csv') or a media type like 'image'.
def []= content_type, klass
register_parser content_type, klass
end
end