Difference between revisions of "ISFDB Download Script"
(add mac os x info) |
Hitspacebar (talk | contribs) (New version of the download script working with Google Drive) |
||
(6 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
= Purpose = | = Purpose = | ||
− | If you don't want to check the [[ISFDB_Downloads|download page]] for newer files every now and then and don't want to click through all the links there manually, and if you're on an UNIX machine like Linux, you can use the shell script shown below to download the ISFDB files. | + | If you don't want to check the [[ISFDB_Downloads|download page]] for newer files every now and then and don't want to click through all the links there manually, and if you're on an UNIX machine like Linux, you can use the shell script shown below to download the ISFDB files. |
− | + | You can optionally ignore certain downloads, see below for more details. | |
− | This script might not work on all UNIX flavours. It was developed on Xubuntu and should work on all similar platforms. It | + | Unlike earlier versions of it, this script is currently not able to resume interrupted downloads or to detect if an already downloaded file is the same version as the one on the backup server (and could therefore be skipped), i.e. that the script simply downloads the latest database backup and the cover image backups again if executed. |
+ | |||
+ | This script might not work on all UNIX flavours. It was developed on Xubuntu and should work on all similar platforms. It should also work on Mac OS X, but the current version of this script has not been tested there. | ||
+ | |||
+ | = Support = | ||
+ | |||
+ | If you've got questions or any other comment regarding this script the best bet is to [[User_talk:Hitspacebar|contact the developer of the script directly through his wiki page]]. | ||
= Examples = | = Examples = | ||
Line 11: | Line 17: | ||
Assuming you saved the script to a file called "isfdb_download.sh", here are some examples of how to call it. | Assuming you saved the script to a file called "isfdb_download.sh", here are some examples of how to call it. | ||
− | + | == Download everything == | |
− | + | This is the most simple case: download covers, latest database dump and get latest source code. Just call the script and tell it the directory in which the downloads shall be stored: | |
− | + | <source lang="bash"> | |
+ | isfdb_download.sh /home/username/backups/isfdb | ||
+ | </source> | ||
− | + | Please note that when you want the source code downloaded you're maybe prompted for a password when calling the script for the first time for the given download directory. Simply hit RETURN there! Subsequent calls of the script will not show the password prompt. | |
− | + | == Database only == | |
− | + | If you're not interested in source code and the huge cover files, ignore them by using options: | |
− | = | + | <source lang="bash"> |
+ | isfdb_download.sh -s -c /home/username/backups/isfdb | ||
+ | </source> | ||
− | + | == Print all available options == | |
− | + | <source lang="bash"> | |
+ | sfdb_download.sh -h | ||
+ | </source> | ||
= The download script = | = The download script = | ||
Line 36: | Line 48: | ||
#!/bin/sh | #!/bin/sh | ||
− | # This scipt downloads the latest database backup file and all | + | # This scipt downloads the latest database backup file and all cover image backups listed on the ISFDB |
− | # downloads page as well as the latest source code from the source code repository | + | # downloads page as well as the latest source code from the source code repository. |
− | |||
− | |||
# You can optionally ignore certain downloads, see code below or call this script using the | # You can optionally ignore certain downloads, see code below or call this script using the | ||
# "-h" option for more info. | # "-h" option for more info. | ||
− | # The | + | # The script is currently not able to resume interrupted downloads. It also isn't able to detect |
− | # and | + | # if an already downloaded file is the same version as the one on the backup server, |
+ | # i.e. that it simply downloads the latest database backup and the cover image backups again. | ||
− | # The | + | # The files to download are identified by examining the downloads page in the ISFDB wiki |
− | # | + | # and extracting the Google Drive ID of these files from it. |
− | |||
− | |||
download_page_url="http://www.isfdb.org/wiki/index.php/ISFDB_Downloads" | download_page_url="http://www.isfdb.org/wiki/index.php/ISFDB_Downloads" | ||
− | |||
− | |||
− | |||
− | usage() | + | usage() |
{ | { | ||
echo "$(basename "$0") [OPTIONS] DOWNLOAD_DIRECTORY" | echo "$(basename "$0") [OPTIONS] DOWNLOAD_DIRECTORY" | ||
Line 81: | Line 87: | ||
usage | usage | ||
exit 1;; | exit 1;; | ||
− | *) download_dir="$1";; | + | *) download_dir="$1";; |
esac | esac | ||
shift | shift | ||
Line 87: | Line 93: | ||
if [ -n "$download_dir" ]; then | if [ -n "$download_dir" ]; then | ||
+ | # If user specified a relative path, turn it into an absolute path | ||
+ | case "$download_dir" in | ||
+ | /*) ;; | ||
+ | *) download_dir="$(pwd)/$download_dir" ;; | ||
+ | esac | ||
mkdir -p "$download_dir" | mkdir -p "$download_dir" | ||
if [ ! -w "$download_dir" ]; then | if [ ! -w "$download_dir" ]; then | ||
Line 108: | Line 119: | ||
download_page="$download_dir/isfdb_download_page.html" | download_page="$download_dir/isfdb_download_page.html" | ||
− | |||
− | |||
− | |||
errors= | errors= | ||
Line 127: | Line 135: | ||
fi | fi | ||
if ! $($curl_cmd) ; then | if ! $($curl_cmd) ; then | ||
− | echo "IFSDB download page $download_page_url could not | + | echo "IFSDB download page $download_page_url could not be downloaded. Did the URL change probably?" |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
exit 1 | exit 1 | ||
fi | fi | ||
Line 144: | Line 145: | ||
echo "******************************************" | echo "******************************************" | ||
echo | echo | ||
− | + | if [ -z $(which svn) ] ; then | |
− | if [ - | + | errors="${errors}\n'svn' executable not found. If you want the source code you have to install the package" |
− | + | errors="${errors}\nwhich contains 'svn' or use the option '-s' to get rid of this message." | |
− | |||
− | |||
− | |||
else | else | ||
− | + | sources_module_name="isfdb2" | |
− | + | if [ -e "$sources_dir/$sources_module_name/" ]; then | |
− | + | cd "$sources_dir/$sources_module_name" | |
− | + | if ! svn update ; then | |
+ | errors="${errors}\nCould not update sources from SVN" | ||
+ | fi | ||
else | else | ||
− | if ! | + | cd "$sources_dir" |
− | errors="${errors}\nCould not check out sources from | + | echo |
+ | echo "No working copy found. An initial checkout of the complete source code will now be attempted." | ||
+ | echo "If nothing seems to happen it is probably waiting for a password to be entered, in which case pressing RETURN should suffice." | ||
+ | if ! svn checkout svn://svn.code.sf.net/p/isfdb/code-svn "$sources_module_name" ; then | ||
+ | errors="${errors}\nCould not check out sources from SVN." | ||
fi | fi | ||
fi | fi | ||
Line 163: | Line 167: | ||
else | else | ||
echo "Ignoring source code" | echo "Ignoring source code" | ||
+ | fi | ||
+ | |||
+ | can_download=true | ||
+ | if ([ -z $ignore_database ] || [ -z $ignore_covers ]) && [ -z $(which gdown) ]; then | ||
+ | errors="${errors}\ngdown is required to download the backup files from Google Drive but could not be found." | ||
+ | errors="${errors}\nIt's a Python library which can usually be installed from the Python Package Index (PyPI) repository:" | ||
+ | errors="${errors}\n pip install gdown" | ||
+ | errors="${errors}\nIf that doesn't work check out https://github.com/wkentaro/gdown" | ||
+ | can_download=false | ||
fi | fi | ||
if [ -z $ignore_database ]; then | if [ -z $ignore_database ]; then | ||
− | echo | + | if $can_download; then |
− | + | echo | |
− | + | echo "******************************************" | |
− | + | echo " Get latest database" | |
− | + | echo "******************************************" | |
− | + | echo | |
− | + | # Expected structure: all links below <a name="Database_Backups"> are database backups, and | |
− | + | # only use the second link (in the table column with the newer MySQL-Version) | |
− | errors="${errors}\ | + | database_id=$(cat "$download_page" | sed '1,/<a name="Database_Backups">/d' | grep https | sed -n '2s/.*id=\([^"]*\).*/\1/p') |
+ | if [ -n "$database_id" ]; then | ||
+ | echo "Trying to download database file with id $database_id" | ||
+ | cd "$database_dir" | ||
+ | gdown --id "$database_id" | ||
+ | # The gdown version 3.8.3 used to develop this script always returns 0, therefore no exit code checking | ||
+ | else | ||
+ | errors="${errors}\nFailed to identify latest database backup file on the ISFDB wiki page." | ||
+ | errors="${errors}\nEither the format of the whole page or of the download links on it has changed." | ||
+ | fi | ||
fi | fi | ||
else | else | ||
Line 181: | Line 203: | ||
if [ -z $ignore_covers ]; then | if [ -z $ignore_covers ]; then | ||
− | echo | + | if $can_download; then |
− | + | echo | |
− | + | echo "******************************************" | |
− | + | echo " Get latest covers" | |
− | + | echo "******************************************" | |
− | + | echo | |
− | + | cd "$covers_dir" | |
− | + | covers_file=/tmp/isfdb_download_covers | |
− | + | # Expected structure: all links between <a name="Image_Backups"> and <a name="Database_Backups"> are cover image backups | |
− | + | cat "$download_page" | sed -n '/<a name="Image_Backups">/,$p' | sed -n '/<a name="Database_Backups">/q;p' | grep https | sed -n 's/.*id=\([^"]*\).*/\1/p' > "$covers_file" | |
− | + | while read -r covers_id | |
− | + | do | |
− | + | echo "Trying to download cover image file with id $covers_id" | |
− | + | gdown --id "$covers_id" | |
− | + | # The gdown version 3.8.3 used to develop this script always returns 0, therefore no exit code checking | |
+ | done < "$covers_file" | ||
+ | rm "$covers_file" | ||
+ | fi | ||
else | else | ||
echo "Ignoring covers" | echo "Ignoring covers" | ||
Line 204: | Line 229: | ||
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!" | echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!" | ||
echo " THERE WERE ERRORS" | echo " THERE WERE ERRORS" | ||
− | echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!" | + | echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!" |
echo | echo | ||
printf "%b\n" "$errors\n" | printf "%b\n" "$errors\n" |
Latest revision as of 14:12, 28 September 2019
Purpose
If you don't want to check the download page for newer files every now and then and don't want to click through all the links there manually, and if you're on an UNIX machine like Linux, you can use the shell script shown below to download the ISFDB files.
You can optionally ignore certain downloads, see below for more details.
Unlike earlier versions of it, this script is currently not able to resume interrupted downloads or to detect if an already downloaded file is the same version as the one on the backup server (and could therefore be skipped), i.e. that the script simply downloads the latest database backup and the cover image backups again if executed.
This script might not work on all UNIX flavours. It was developed on Xubuntu and should work on all similar platforms. It should also work on Mac OS X, but the current version of this script has not been tested there.
Support
If you've got questions or any other comment regarding this script the best bet is to contact the developer of the script directly through his wiki page.
Examples
Assuming you saved the script to a file called "isfdb_download.sh", here are some examples of how to call it.
Download everything
This is the most simple case: download covers, latest database dump and get latest source code. Just call the script and tell it the directory in which the downloads shall be stored:
isfdb_download.sh /home/username/backups/isfdb
Please note that when you want the source code downloaded you're maybe prompted for a password when calling the script for the first time for the given download directory. Simply hit RETURN there! Subsequent calls of the script will not show the password prompt.
Database only
If you're not interested in source code and the huge cover files, ignore them by using options:
isfdb_download.sh -s -c /home/username/backups/isfdb
Print all available options
sfdb_download.sh -h
The download script
Copy and paste the code into a text editor, save it and make the file executable:
#!/bin/sh
# This scipt downloads the latest database backup file and all cover image backups listed on the ISFDB
# downloads page as well as the latest source code from the source code repository.
# You can optionally ignore certain downloads, see code below or call this script using the
# "-h" option for more info.
# The script is currently not able to resume interrupted downloads. It also isn't able to detect
# if an already downloaded file is the same version as the one on the backup server,
# i.e. that it simply downloads the latest database backup and the cover image backups again.
# The files to download are identified by examining the downloads page in the ISFDB wiki
# and extracting the Google Drive ID of these files from it.
download_page_url="http://www.isfdb.org/wiki/index.php/ISFDB_Downloads"
usage()
{
echo "$(basename "$0") [OPTIONS] DOWNLOAD_DIRECTORY"
echo "Valid options are:"
echo " -c | --ignore-covers : ignore cover files"
echo " -d | --ignore-database : ignore database file"
echo " -s | --ignore-sources : ignore source code"
echo " -h | --help : this message"
}
ignore_sources=
ignore_database=
ignore_covers=
while [ "$1" != "" ]; do
case $1 in
-s | --ignore-sources ) ignore_sources=true;;
-d | --ignore-database ) ignore_database=true;;
-c | --ignore-covers ) ignore_covers=true;;
-h | --help ) usage
exit;;
-* ) echo "Unkown option $1"
usage
exit 1;;
*) download_dir="$1";;
esac
shift
done
if [ -n "$download_dir" ]; then
# If user specified a relative path, turn it into an absolute path
case "$download_dir" in
/*) ;;
*) download_dir="$(pwd)/$download_dir" ;;
esac
mkdir -p "$download_dir"
if [ ! -w "$download_dir" ]; then
echo "ERROR: Backup directory '$download_dir' couldn't be created or is not writeable!"
usage
exit 1
fi
else
echo "ERROR: No backup directory provided!"
usage
exit 1
fi
sources_dir="$download_dir/sources"
database_dir="$download_dir/database"
covers_dir="$download_dir/covers"
mkdir -p "$sources_dir"
mkdir -p "$database_dir"
mkdir -p "$covers_dir"
download_page="$download_dir/isfdb_download_page.html"
errors=
echo
echo "******************************************"
echo " Get and check download page"
echo "******************************************"
echo
if [ -e "$download_page" ]; then
# Download the page only if it has been changed since the last download (using timestamp
# comparison):
curl_cmd="curl -z $download_page -o $download_page $download_page_url"
else
curl_cmd="curl -o $download_page $download_page_url"
fi
if ! $($curl_cmd) ; then
echo "IFSDB download page $download_page_url could not be downloaded. Did the URL change probably?"
exit 1
fi
if [ -z $ignore_sources ]; then
echo
echo "******************************************"
echo " Check out or update source code"
echo "******************************************"
echo
if [ -z $(which svn) ] ; then
errors="${errors}\n'svn' executable not found. If you want the source code you have to install the package"
errors="${errors}\nwhich contains 'svn' or use the option '-s' to get rid of this message."
else
sources_module_name="isfdb2"
if [ -e "$sources_dir/$sources_module_name/" ]; then
cd "$sources_dir/$sources_module_name"
if ! svn update ; then
errors="${errors}\nCould not update sources from SVN"
fi
else
cd "$sources_dir"
echo
echo "No working copy found. An initial checkout of the complete source code will now be attempted."
echo "If nothing seems to happen it is probably waiting for a password to be entered, in which case pressing RETURN should suffice."
if ! svn checkout svn://svn.code.sf.net/p/isfdb/code-svn "$sources_module_name" ; then
errors="${errors}\nCould not check out sources from SVN."
fi
fi
fi
else
echo "Ignoring source code"
fi
can_download=true
if ([ -z $ignore_database ] || [ -z $ignore_covers ]) && [ -z $(which gdown) ]; then
errors="${errors}\ngdown is required to download the backup files from Google Drive but could not be found."
errors="${errors}\nIt's a Python library which can usually be installed from the Python Package Index (PyPI) repository:"
errors="${errors}\n pip install gdown"
errors="${errors}\nIf that doesn't work check out https://github.com/wkentaro/gdown"
can_download=false
fi
if [ -z $ignore_database ]; then
if $can_download; then
echo
echo "******************************************"
echo " Get latest database"
echo "******************************************"
echo
# Expected structure: all links below <a name="Database_Backups"> are database backups, and
# only use the second link (in the table column with the newer MySQL-Version)
database_id=$(cat "$download_page" | sed '1,/<a name="Database_Backups">/d' | grep https | sed -n '2s/.*id=\([^"]*\).*/\1/p')
if [ -n "$database_id" ]; then
echo "Trying to download database file with id $database_id"
cd "$database_dir"
gdown --id "$database_id"
# The gdown version 3.8.3 used to develop this script always returns 0, therefore no exit code checking
else
errors="${errors}\nFailed to identify latest database backup file on the ISFDB wiki page."
errors="${errors}\nEither the format of the whole page or of the download links on it has changed."
fi
fi
else
echo "Ignoring database"
fi
if [ -z $ignore_covers ]; then
if $can_download; then
echo
echo "******************************************"
echo " Get latest covers"
echo "******************************************"
echo
cd "$covers_dir"
covers_file=/tmp/isfdb_download_covers
# Expected structure: all links between <a name="Image_Backups"> and <a name="Database_Backups"> are cover image backups
cat "$download_page" | sed -n '/<a name="Image_Backups">/,$p' | sed -n '/<a name="Database_Backups">/q;p' | grep https | sed -n 's/.*id=\([^"]*\).*/\1/p' > "$covers_file"
while read -r covers_id
do
echo "Trying to download cover image file with id $covers_id"
gdown --id "$covers_id"
# The gdown version 3.8.3 used to develop this script always returns 0, therefore no exit code checking
done < "$covers_file"
rm "$covers_file"
fi
else
echo "Ignoring covers"
fi
if [ -n "$errors" ]; then
echo
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
echo " THERE WERE ERRORS"
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
echo
printf "%b\n" "$errors\n"
else
echo "Done."
fi