# (c) 2006 Rufus Pollock
# All material here is hereby placed in the Public Domain (or as close as you
# can get in your jurisdiction). It may be freely used, reused and
# redistributed without the need to seek permission or provide acknowledgement.
#
# This file provides a demonstration of how to use the python oaipmh package
# provided by infrae.com, available at http://www.infrae.com/download/oaipmh

import datetime
import oaipmh.client
import oaipmh.metadata

# bibliotheque nationale de france
url = 'http://oai.bnf.fr/oai2' # this one does not seem to work ...
url = 'http://oai.bnf.fr/repositoryOAI.php'
# british library
url = 'http://sherpa.bl.uk/perl/oai2'
# repec (economics)
url = 'http://oai.repec.openlib.org/'

client = oaipmh.client.Client(url)

out = client.identify()
print '****** Connected to repository: %s' % out.repositoryName()

# got to update granularity or we barf with:
# oaipmh.error.BadArgumentError: Max granularity is YYYY-MM-DD:2003-04-10T00:00:00Z
client.updateGranularity()

# get a list of the metadata formats
formats = client.listMetadataFormats()
print '****** Available formats are: *****'
for format in formats:
    print format

# we asumme 'oai_dc' was in list of formats (we should really check this)
metaprefix = 'oai_dc'

# register a reader on our client to handle oai_dc metadata
# if we do not attempt to read records will fail with:
#   .../oaipmh/metadata.py", line 37, in readMetadata
#   KeyError: 'oai_dc'
client.getMetadataRegistry().registerReader(metaprefix,
        oaipmh.metadata.oai_dc_reader)

# date we are going to using in restricting lookup of records
date = datetime.datetime(2006, 04, 10)

headers = client.listIdentifiers(from_=date,
        metadataPrefix=metaprefix)
# headers = list(headers)
# print 'We have %s identifiers' % len(headers)
# for hh in headers:
#    print header.identifier(), header.datestamp()
#    pass

records = client.listRecords(from_=date,
        metadataPrefix=metaprefix)

# just look at first one record

# for large collections this breaks
# records = list(records)
count = 0
for record in records:
    header, metadata, about = record
    map = metadata.getMap()
    print '****** Printing information about record: %s' % header.identifier()
    for key, value in map.items():
        print '  ', key, ':', value

    break

