#/bin/zsh # mrcheckbib - version DBL_MIN # checks a bibtex file (.bib) against the AMS MRef database # # Copyright (C) 2006 John Cagnol # University Leonard de Vinci, Paris, France # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. This document can # be obtained at http://www.gnu.org/licenses/gpl.txt # # Usage: # mrcheckbib basename # # Where: # basename.bib is the bibtex file to be checked # # Output: # result is placed in file-mrefed.bib and log is placed in # file-mrefed.log # # Example: # mrcheckbib references # # checks file references.bib against MRef and places the result in # references-mrefed.bib # # Purpose of the script: # # This script takes every entry in a bibtex file (.bib) and checks it # against the AMS MRef database (see http://www.ams.org/mref). If it # finds a unique MRef entry, then your entry will be replaced by the # MRef one. For example, if your bibtex entry is # # @article{greens, # Author="Cagnol, J. and Lebiedzik, C.}, # Title="On the free boundary conditions for a dynamic shell model # based on intrinsic differential geometry # Journal="Applicable Analysis", Year=2004} # # It will be replaced by # # @article {greens, # AUTHOR = {Cagnol, John and Lebiedzik, Catherine}, # TITLE = {On the free boundary conditions for a dynamic shell # model based on intrinsic differential geometry}, # JOURNAL = {Appl. Anal.}, # FJOURNAL = {Applicable Analysis. An International Journal}, # VOLUME = {83}, # YEAR = {2004}, # NUMBER = {6}, # PAGES = {607--633}, # ISSN = {0003-6811}, # MRCLASS = {74K25 (35B35 53B50 74H99)}, # MRNUMBER = {MR2059476 (2006c:74066)}, # MRREVIEWER = {Liliana Gratie}, # } # # The latter is more complete than the former (page numbers, etc.). # Please note that mrcheckbib does not try to figure out which entry # is more complete. If a unique MRef entry is found, then your entry # is gone. In most cases, this is good because the MRef entry is more # complete. # # mrcheckbib is intended to be used with a standard bibfile, with no # comment line. It is assumed your shell is zsh and that have a # non-interactive network retriever such as wget. If you use curl # or any other retriever, you'll need to adapt the options. # # The script relies on the fact that MRef response is bracketed by #
 tags.  If AMS changes this, then script will stop working.
#
# Please note that this script was written in haste, it was not
# intended to be portable, robust or well-written.  Version number is
# DBL_MIN, which is pretty low!  It was released because several people 
# asked me to, but it has not been tested extensively, and may require 
# fine tuning.  If you come up with a better version, please let me
# know. 


# on-interactive network retriever used
wget=wget
# if you change this, you'll need to change the options as well

# File names
inputfile=$1.bib
outputfile=$1-mrefed.bib
outputlog=$1-mrefed.log
tempfile=$1.$$

# Counters
refcount=0
okcount=0
numcount=0

# Messages
date=`date +"%d %b %Y"`
verified="% checked against MRef on $date"
unverified="% Could not find a unique match on MRef on $date";
weird="% reference verification failed on $date"; 

# Output files better not exist
if test -e $outputfile
then
  echo "$0: file $outputfile exists.  Aborting"
  exit;
fi

if test -e $outputlog
then
  echo "$0: file $outputlog exists.  Aborting"
  exit;
fi

# bib entries are placed one by line, tabulations removed, spaces
# replaced by @ 
cat $inputfile | tr -d \\n | tr -d \\t | tr @ \\n | sed s/\ /@/g > $tempfile

echo "% Generated by mrcheckbib version DBL_MIN on $date" >> $outputfile
echo "% For more information, please visit http://www.cagnol.com/mrcheckbib" >> $outputfile
echo " " >> $outputfile
echo " " >> $outputfile

# Processing of each entry
for entry in $(<$tempfile)
do
  # Entry info
  refcount=`expr $refcount + 1`
  reference=`echo $entry | sed s/@/%20/g`
  bibtype=`echo $reference | cut -d\{ -f1 | sed s/%20//g `
  bibtag=`echo $reference | cut -d\{ -f2 | cut -d\, -f1 | sed s/%20//g`
  echo "Checking reference $refcount ($bibtag, $bibtype)"
  echo "[$refcount, $bibtag, $bibtype]" >> $outputlog

  # fetch the MRef for the entry
  echo -n "connecting. "
  URL="http://www.ams.org/mathscinet-mref?&dataType=bibtex&ref="$reference
  # if you use curl, adapt the options below
  wget -a $outputlog -O $tempfile.$refcount $URL

  # figure out the start and the end of the entry (delimited by 
 tags)
  start=`grep -n "
" $tempfile.$refcount | cut -d\: -f1`
  end=`grep -n "
" $tempfile.$refcount | cut -d\: -f1` echo "start=$start, end=$end" >> $outputlog # if
 tags were not found then the MRef failed for this entry
  if test "$start" == ""
  then
    result=1;
  else
      if test "$end" == ""
      then
        result=2;
      else
        result=0;
      fi
  fi 

  case $result in
  1) # No 
 tag could be found, most likely because no unique match could be found
     echo "Not a unique match.";
     echo "Not a unique match." >> $outputlog;
     numcount=`expr $numcount + 1`;
     echo $unverified >> $outputfile;
     echo -n "@">> $outputfile;
     echo $entry | sed s/@/\ /g >> $outputfile;;

  2) # A 
 tag could be found, but no 
tag, that's bizzare. echo "Internal error."; echo "Internal error." >> $outputlog; echo $weird >> $outputfile; echo -n "@">> $outputfile; echo $entry | sed s/@/\ /g >> $outputfile;; 0) # Tags
 and 
were found, MRef returned a result! echo "found. " echo "OK." >> $outputlog; okcount=`expr $okcount + 1`; # find were to cut length=`expr $end - $start - 1`; endm1=`expr $end - 1`; # get the entrytype from MRef newbibtype=`head -$start $tempfile.$refcount | tail -1 | cut -d\@ -f2 | cut -d\{ -f1 | sed s/\ //g`; # get the data from MRef head -$endm1 $tempfile.$refcount | tail -$length > $tempfile.$refcount.res; echo $verified >> $outputfile; # Warn if entry types disagree if test "$bibtype" != "$newbibtype" then echo "discrepency: your is entry type was $bibtype, MRef is $newbibtype" >> $outputlog; fi # Output the result echo "@$newbibtype{$bibtag," >> $outputfile; cat $tempfile.$refcount.res >> $outputfile; echo "}" >> $outputfile;; esac echo " " >> $outputfile echo " " >> $outputfile # Get rid of temp files rm -f $tempfile.$refcount $tempfile.$refcount.res done # Get rid of temp files rm -f $tempfile # Print summary information if test "$refcount" -ge 1 then echo $refcount references were checked else echo "no entry found" fi if test "$okcount" -ge 1 then echo $okcount references were found else echo "none of your references were found" fi if test "$numcount" -ge 1 then echo $numcount references could not be uniquely matched fi echo "done"