zipitup.sh
The snippet can be accessed without any authentication.
Authored by
ssapra2
snippetfile1.txt 2.13 KiB
#!/bin/bash
domain_dirs=("written" "transcribed" "multimode" "spoken" "treebanks" "misc")
absolute_url=$PWD # or /shared/corpora/corporaWeb/
one=1;
num_zipped=0;
num_errors=0;
num_total=0;
valid_corpora_listing() {
if !is_data_folder $1; then return "Add data folder in the corpora listing"; fi
if !is_docs_folder $1; then return "Add docs folder in the corpora listing"; fi
if !is_readme $1; then return "Add a README.txt or index.html in the corpora listing"; fi
}
is_data_folder() {
# Check if there is a folder named data
if [ -s $1/data ]; then
echo 1;
else
echo "ERROR: No data folder found" >&2
echo 0;
fi
}
is_docs_folder() {
# Check if there is a folder named docs
if [ -s $1/docs ]; then
echo 1; # Return 1
else
echo "ERROR: No docs folder found" >&2
echo 0; # Return 0
fi
}
is_readme() {
# Check if index page or read me is there
if [[ -s $1/README.txt || -s $1/index.html ]]; then
echo 1; #Return 1
else
echo "ERROR: No README found" >&2
echo 0; #Return 0
fi
}
print_file_size() {
# Print just the file size and not the file again
echo "Full Directory $1"
printf "\n"
echo "File Size: $(du -sh $1 | cut -c 1-4)"
printf "\n"
}
for dir in "${domain_dirs[@]}"
do
printf "###################################################### BEGIN $dir\n\n"
corpora_dirs=(`find $absolute_url/$dir -mindepth 2 -maxdepth 2`)
echo "${#corpora_dirs[@]} corpora entries found"
for corp_dir in "${corpora_dirs[@]}"
do
printf "##### $(basename $corp_dir) ##### \n"
print_file_size $corp_dir
readme=$(is_readme $corp_dir)
docs=$(is_docs_folder $corp_dir)
data=$(is_data_folder $corp_dir)
if [[ $docs -eq $one ]]; then
echo "Zipping files for $dir"
else
((num_errors++))
fi
printf "\n"
((num_total++))
done
done
printf "######################################################## END\n\n"
printf "Summary:\n"
echo "$num_total total corpora entries found"
echo "$num_zipped corpora entries zipped"
echo "$num_errors corpora entries could not be zipped"
exit 0
# Zip all files now
# Use something like this:
# for i in */; do zip -r "${i%/}.zip" "$i"; done
Please register or sign in to comment