Library Software

From assela Pathirana
Jump to navigationJump to search

ISBN2MARC

  • Original source code here. Following is provided for consistency only. (Full credit should go to William Denton).
#!/usr/local/bin/ruby -w

# isbn2marc - find MARC records for one or more ISBNs

# William Denton <wtd@pobox.com>
# April 2007 - October 2008
# Released under the MIT License.

# Copyright (c) 2008 William Denton
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

# INSTALLATION NOTES
#
# Requires the ruby-zoom package (which requires YAZ) and the marc gem.
#
# ruby-zoom: http://ruby-zoom.rubyforge.org/
# See the installation instructions there
#
# marc gem: http://rubyforge.org/projects/marc/
# $ sudo gem install marc
#
# NOTE ruby-zoom installs its own marc.rb file that will conflict with
# marc! You will need to delete ruby-zoom's marc.rb or rename it 
# for the marc gem to work.
# 
# On my system it was installed in
#   /usr/local/lib/ruby/site_ruby/1.8/marc.rb
# but you'll have to look for it wherever your system put it.

# USAGE
#
# isbn2marc [-q] [-d marcfile] [-x xmlfile] ISBN
#
# -q          Run quietly, don't list servers queried and don't 
# --quiet     dump MARC to STDOUT
#             By default isbn2marc tells STDERR what it's doing
#
# -d marcfile Dump MARC record to file
# --dump
#
# -x xmlfile  Dump MARCXML record to file
# --dumpxml
#
# -w apikey   Query WorldCat using API key
# --worldcat
#
# -l level    WorldCat service level (default or full)
# --worldcatlevel Default is default

# EXAMPLES
#
# Find a MARC record for the first volume of Casanova's HISTORY OF
# MY LIFE and pretty print it:
#
# $ isbn2marc 0801856620 
#
# Find a MARC record for Terry Pratchett's THIEF OF TIME and write
# a binary MARC file to disk as well as pretty printing it:
#
# $ isbn2marc -d thief.marc 0552148407
#
# Find a MARC record for the first omnibus volume of the University of
# Chicago Press's edition of Anthony Powell's A DANCE TO THE MUSIC OF TIME,
# but don't show the record on the screen:
#
# $ isbn2marc -q -d stringham.marc -x templer.xml 0226677141
#
# Find MARC records for all ISBNs in a file and save them on disk:
#
# $ cat isbns.txt
# 0439064864
# 0439064872
# 0439136350
# 0439136369
# 0439139600
# 059035342X
# 0736646736
# $ isbn2marc -f isbns.txt -d harry-potter.marc
#
# Try WorldCat first, which will speed things up, and get a full record:
#
# $isbn2marc --worldcat yourAPIkeyhere --worldcatlevel full 1551922460
#
# Find a MARC record but don't display it or save it:
#
# $ isbn2marc -q 0195024028
#
# You'd be a bit silly to run that often.

# TO DO
#
# Proper ISBN validity check.

require 'net/http'
require 'optparse'
require "rexml/document"
include REXML

require 'rubygems'
require 'marc'
require 'zoom'

# I don't know if it's cool to use global variables for these
# options, or for the MARC writer and xmlwriter thingies, but
# I am because of the write_record function.  Comments welcome.
$options = {}
$options[:quiet] = false
$options[:file]  = nil
$options[:dump] = nil
$options[:dumpxml] = nil

# Enter your WorldCat API key here.
# It can also be specified on the command line but that's tiresome.
# To get a key, see
#  http://worldcat.org/devnet/wiki/SearchAPIWhoCanUse
# For WorldCat API details see
#    http://worldcat.org/devnet/wiki/SearchAPIDetails
$options[:worldcat] = ''
$options[:worldcatlevel]= 'default' # Or full

OptionParser.new do |opts|
  opts.banner = "Usage: isbn2marc [-q] [-d marcfile] [-x xmlfile] isbn"
  opts.on("-q", "--quiet", "Run silently") { $options[:quiet] = true }
  opts.on("-d", "--dump d", "Dump MARC to file d") do |d|
    $options[:dump]  = d
  end
  opts.on("-x", "--dumpxml x", "Dump MARCXML to file x") do |x| 
    $options[:dumpxml] = x
  end
  opts.on("-f", "--file f",  String, "Read ISBNs from file f (one per line)") do |f|
    $options[:file] = f 
  end
  opts.on("-w", "--worldcat apikey", String, "WorldCat API key") do |w|
    $options[:worldcat] = w
  end
  opts.on("-l", "--worldcatlevel level", String, "WorldCat service level (default or full)") do |wl|
    $options[:worldcatlevel] = wl
  end
end.parse!

# Probably won't change, but one never knows.
worldcatAPIURL = "http://www.worldcat.org/webservices/catalog/content/isbn/::ISBN::?servicelevel=::LEVEL::&wskey=::KEY::"

isbns = []

if $options[:file].nil?
  if ARGV.empty?
    STDERR.puts "No ISBN specified"
    exit 0
  end
else
  begin
    file = File.open($options[:file], "r")
    while (line = file.gets)
      line.chomp!
      # TODO Proper ISBN validity check
      next unless /(978)*\d{9}[0-9X]/.match(line)
      # puts "'#{line}'"
      isbns << line.gsub(/[^0-9X]/, '')
    end
    file.close
  rescue Exception => e
    STDERR.puts "ERROR '#{e}'"
  end
end

if ! ARGV.empty?
  isbns << ARGV[0].to_s.gsub(/[^0-9X]/, '')
end

if $options[:dump]
  begin
    $writer = MARC::Writer.new($options[:dump])
  rescue Exception => e
    STDERR.puts "ERROR Cannot write to #{$options[:dump]}: '#{e}'"
    exit 0
  end
end

if $options[:dumpxml]
  begin
    $xmlwriter = MARC::XMLWriter.new($options[:dumpxml])
  rescue Exception => e
    STDERR.puts "ERROR Cannot write to #{$options[:dumpxml]}: '#{e}'"
    exit 0
  end
end

# Two lists of open Z39.50 servers:
#   http://targettest.indexdata.com/
#   http://staff.library.mun.ca/staff/toolbox/z3950hosts.htm
# Could also look at the list in Terry Reese's MarcEdit:
#   http://oregonstate.edu/~reeset/marcedit/html/downloads.html

servers = [
  # Reorder these so that your preferred servers are first
  # North America
  ['theta.library.yorku.ca',     2200, 'unicorn'     ], # York
  ['sirsi.library.utoronto.ca',  2200, 'unicorn'     ], # U Toronto
  ['amicus.collectionscanada.gc.ca', 210, 'NL'       ], # Lib & Archives Canada
  ['z3950.loc.gov',              7090, 'Voyager'     ], # Library of Congress
  ['aleph.mcgill.ca',             210, 'MUSE'        ], # McGill
# ['ualapp.library.ualberta.ca', 2200, 'unicorn',    ], # U Alberta
  ['portage.library.ubc.ca',     7090, 'voyager'     ], # UBC
  ['catnyp.nypl.org',             210, 'INNOPAC'     ], # New York Pub Lib
  ['library.mit.edu',            9909, 'mit01pub'    ], # MIT
  ['prodorbis.library.yale.edu', 7090, 'voyager'     ], # Yale
  ['catalog.princeton.edu',      7090, 'voyager'     ], # Princeton
  ['ipac.lib.uchicago.edu',       210, 'usmarc'      ], # Chicago
  ['www.saclibrarycatalog.org',   210, 'INNOPAC'     ], # Sacramento Pub Lib
  ['library.bu.edu',              210, 'INNOPAC'     ], # Boston U
  ['voyager.wrlc.org',           7090, 'voyager'     ], # Wash Res Lib Consor
  ['catalog.lib.jhu.edu',         210, 'horizon'     ], # Johns Hopkins
  ['z3950.lib.umich.edu',         210, 'miu01_pub'   ], # U Michigan
  ['catalog.library.cornell.edu',7090, 'voyager'     ], # Cornell
  # UK and Ireland
  ['library.ucc.ie',              210, 'INNOPAC'     ], # U College Cork
  ['library.ox.ac.uk',            210, 'MAIN*BIBMAST'], # Oxford
  ['z3950.nls.uk',               7290, 'voyager'     ], # Scottish Nat Lib
  ['lib-15.lse.ac.uk',           7090, 'voyager'     ], # LSE
  ['libsys.lib.hull.ac.uk',       210, 'INNOPAC'     ], # Hull
  # Europe (non-English)
  ['sigma.nkp.cz',               9909, 'NKC'         ], # Nat Lib Czech R
  ['lib.mpib-berlin.mpg.de',     2020, 'opac'        ], # Max Planck Inst
  ['ubsun02.biblio.etc.tu-bs.de',2020, 'bac'         ], # Bibliotheken Berlins
  ['z3950.kb.dk',                2100, 'KGL01'       ], # Kongelige Bibliothek
  ['roble.unizar.es',             210, 'INNOPAC'     ], # U Zaragoza
  ['www.helmet.fi',               210, 'INNOPAC'     ], # Helsinki Lib
  ['carmin.sudoc.abes.fr',        210, 'ABES-Z39-PUBLIC' ], # France
  ['gofor.bibli.ens-cachan.fr', 21210, 'ADVANCE'     ], # French school
  ['gofor.bibli.ens-cachan.fr', 21210, 'MAIN*BIBMAST'], # French school
  ['isis.cilea.it',              2100, 'usmarc'      ], # U Brescia
  ['z3950.bibsys.no',            2100, 'BIBSYS'      ], # Nat Lib Norway
  ['z3950.nb.no',                2100, 'norbok'      ], # Nat Lib Norway
  ['alpha.bn.org.pl',             210, 'INNOPAC'     ], # Nat Lib Poland
  ['z3950.btj.se',                210, 'BURK'        ], # Sweden
  # Australia and New Zealand
  ['catalogue.nla.gov.au',       7090, 'voyager'     ], # Nat Lib Australia
  ['nlnzcat.natlib.govt.nz',     7190, 'voyager'     ], # Nat Lib New Zealand
  # Asia
  ['library.cuhk.edu.hk',         210, 'INNOPAC'     ], # Chinesse U HK
  ['linc.nus.edu.sg',             210, 'INNOPAC'     ], # Nat U Singapore
  ['nbinet.ncl.edu.tw',           210, 'INNOPAC'     ], # Nat Cent Lib Taiwan
  # Africa
  ['explore.up.ac.za',            210, 'INNOPAC'     ], # U Pretoria  
]

# Given an ISBN and some Z39.50 server information, return MARCXML.
# Why MARCXML?  Because (now) the ruby-zoom module can't return a
# marc MARC object. It can, however, return MARCXML, which marc can
# grok.
def z3950query (isbn, host, port, db)
  begin
    ZOOM::Connection.open(host, port) do |conn|
      conn.database_name = db
      conn.preferred_record_syntax = 'MARC21'
      rset = conn.search("@attr 1=7 #{isbn}")
      return rset[0].xml 
    end
  rescue Exception => e
    STDERR.puts "\nERROR Z39.50 query: '#{e}'" if /failed/.match(e)
    return nil
  end
end

# Given a MARC reader object, write out the records therein, to
# STDOUT or disk, in the desired format.
# TODO Write out only the *first* one. (Why?)
def write_record (marcxml)
  reader = MARC::XMLReader.new(StringIO.new(marcxml))
  begin
    reader.each do |record|
      puts; puts record unless $options[:quiet] == true
      $writer.write(record) if $options[:dump]
      $xmlwriter.write(record) if $options[:dumpxml]
    end
  rescue Exception => e
    STDERR.puts "\nERROR Record not read: '#{e}'"
  end
end

# Now the real business.  If we can check WorldCat, do so, because
# it's the best source.  If we can't, or if it fails, loop through all
# the servers listed above and ask about the ISBN(s).

isbns.each do |isbn| 
  if $options[:worldcat].length > 0
    STDERR.print "\r#{isbn} WorldCat" + " "*25 unless $options[:quiet] == true
    worldcatURL = worldcatAPIURL.gsub(/::ISBN::/, isbn)
    worldcatURL = worldcatURL.gsub(/::KEY::/, $options[:worldcat])
    worldcatURL = worldcatURL.gsub(/::LEVEL::/, $options[:worldcatlevel])
    # STDERR.print "\nGetting #{worldcatURL}"
    worldcat_xml = Net::HTTP.get_response(URI.parse(worldcatURL)).body
    doc = REXML::Document.new worldcat_xml
    error = doc.elements['//diagnostics/diagnostic/message'].nil? ? nil : doc.elements['//diagnostics/diagnostic/message'].text
    if error.nil? 
      write_record(worldcat_xml)
      next
    else
      STDERR.puts "\nERROR from WorldCat: #{error}"
    end
  end
  servers.each do |server|
    STDERR.print "\r#{isbn} #{server[0]}" + " "*25 unless $options[:quiet] == true
    marcxml = z3950query(isbn, server[0], server[1], server[2])
    next if marcxml.nil?
    write_record(marcxml)
    break
  end  
end

$writer.close if $options[:dump]
$xmlwriter.close if $options[:dumpxml]
  • Essential ruby installation:
 
wget http://rubyforge.org/frs/download.php/38646/rubygems-1.2.0.tgz
tar -xzvf rubygems-1.2.0.tgz 
cd rubygems-1.2.0/
sudo ruby setup.rb
sudo gem update --system
sudo apt-get install libyaz-dev
sudo gem install zoom
sudo gem install marc

=Errata

  • The following sites in the above script fail frequently:
portage.library.ubc.ca:7090 (instead ils.library.ubc.ca:7090 work!)
ipac.lib.uchicago.edu:210