#!/bin/bash # mcgmail : a script to locally back-up all your IMAP eMails # v 0.6.2 version -- with Thunderbird & Evolution integration # January 2014 # COPYRIGHT Arrow-SEO.com 2014 # You are not permitted to sell this script, but you can use it, alter it, copy # it or distribute it, providing that you do not delete this copyright notice, # and you do not remove any reference or links to Arrow-SEO.com # For support or questions, please visit: http://arrow-seo.com/ # McGmail is offered without any warranty of any kind. Use at your own discretion. # recommended steps for use: # 1. Run back-up on your system drive! # 2. File any older eMails from your INBOX to appropriate sub-directories. # 3. Empty your IMAP mail trash -- unless you want to back-up your trash. ? # 4. Run McGmail as follows # $ mcgmail email-client emailaddress@yourdomain.com # where email client is the name of the program you use to down-load and # view eMails, like Thunderbird or Sylpheed, etc., and emailaddress is # your eMail address that you wish to back-up long-term. # McGmail will create individual MH format eMail files and mirror directory # tree under your main IMAP folder. It will move any eMail older than # 120 days into this mirror archive tree to the matching directory, # renaming it in the process by receive date and sender's eMail address. # This MAY take up more space on your main drive, but these days, drive space # is cheap. # As many people keep eMails older than 120 days in their main INBOX, # McGmail currently skips any eMail still filed under the INBOX directory. # To change the age in the script, search for 'AGE="120"' below and alter the # the number accordingly. # a NOTE about mbox format: McGmail relies on a very well written script # called 'mbox2mh' by Valentin Koch to convert mbox format # to MH format. You can get info and d/l it at: # http://vale.thus.ch/software/mbox2mh.html # If you are using Thunderbird, etc. 'mbox2mh' must be installed in either # /usr/bin or ~/bin to work completely. # a NOTE on Outlook *.pst files: McGmail relies on readpst and libpst packages # to convert MS Outlook *.pst data files to MH format eMails. These functions # have not yet been tested with McGmail. If you are using Outlook on your PC, # please install these packages in their appropriate Windows folder for # McGmail to work with your eMail client. (and please let us know your results!) PROGNAME=$(basename $0) usage () { cat <<- EOF Usage 1: $PROGNAME CLIENT ADDRESS Where: CLIENT is the preferred eMail client used to retrieve IMAP eMails ADDRESS is the IMAP eMail address to archive messages from. EOF return } # test for NO value entered w/ command if [[ $# == 0 ]]; then usage >&2 exit fi # display running script and create logfile exec > >(tee -a ~/Desktop/"McGmail-log_$(date +%F_%T%Z).log") 2>&1 # locate specified mail client's home folder, eMail address to be archived: CLIENT="$1" echo "eMail client program is $CLIENT." CLIENT=$(echo "$CLIENT" | tr "[:upper:]" "[:lower:]") ADDRESS="$2" echo "eMail address to archive is $ADDRESS." # to alter the age at which eMails get archived, change the numeric value below: AGE="120" #AGE="0" ARCHDATE=$(date -Rd -"$AGE"days) echo "McGmail will now archive eMails older than \"$ARCHDATE.\"" # to alter the minimum threshold count, change the value below: MIN="25" echo "McGmail will look for $MIN eMails or more when filtering potential directories." # reset IFS to allow for directory/file names w/ spaces, odd characters, etc: OIFS=$IFS IFS=$'\n' # ---------------------------------------- # this section IDs pertinent directories. # identify the Current Working Directory before we start messing around: CWD=$(pwd) # cd $CWD to return to starting directory # find CLIENT $USER folder: CTLIST=$(find ~ -iname "*$CLIENT*" 2>/dev/null) echo "The CTList contains: $CTLIST" for i in $CTLIST; do if [ $(rgrep "$i" -e "$ADDRESS" | grep -iv archives | wc -l ) -ge "$MIN" ]; then CLIENT_FOLDER="$i" if [ -d "$CLIENT_FOLDER" ]; then echo "The path to the CLIENT DIRECTORY is $CLIENT_FOLDER" # elif [ $(echo "$CLIENT") == "outlook" ]; then # CLIENT_FOLDER="~/AppData/Local/Microsoft" # if [ -d "$CLIENT_FOLDER" ]; then # echo "The path to Outlook's user directory is $CLIENT_FOLDER" fi elif [ $(rgrep "$i" -e "$ADDRESS" | grep -iv archives | wc -l) -lt "$MIN" ]; then echo "\"$i\" is not the correct folder." else echo "ERROR: no qualfying CLIENT folder. CHECK eMail address or client." >&2 exit 1 fi done if [ -z "$CLIENT_FOLDER" ]; then echo "ERROR: CLIENT folder is blank. Try lowering the \$MIN value in script." >&2 exit 1 fi # find the main INBOX folder for specified $ADDRESS: IBLIST=$(find $CLIENT_FOLDER -type d -iname "*inbox*" | grep -iv archives) 2>/dev/null echo "IBList contains: $IBLIST" for i in $IBLIST; do if [ $(rgrep "$i" -e "$ADDRESS" | grep -iv archives | wc -l) -ge "$MIN" ]; then INBX_FOLDER="$i" if [ -d "$INBX_FOLDER" ]; then echo "The path to the INBOX folder for $ADDRESS is $INBX_FOLDER." else echo "$i does not actually exist. Go figyah!" fi elif [ $(rgrep "$i" -e "$ADDRESS" | grep -iv archives | wc -l) -lt "$MIN" ]; then echo "$i is not the inbox folder you're looking for..." else echo "Error: unable to locate INBOX folder." >&2 exit 1 fi done if [ -z "$INBX_FOLDER" ]; then echo "The INBX Folder value is BLANK. Please check the name of \$CLIENT." >&2 exit 1 fi # identify the main $eMAIL_FOLDER for $ADDRESS eMAIL_FOLDER=$(cd "$INBX_FOLDER"; cd ..; pwd) if [ -d "$eMAIL_FOLDER" ]; then echo "The path to the eMail folder is $eMAIL_FOLDER" elif [ ! -d "$eMAIL_FOLDER" ]; then # could / should this code be simplified if effective? if [ $(grep -im 1 -d recurse "$ADDRESS" "$IMAP_FOLDER" | grep -iv "archives" | wc -l) -eq 0 ]; then echo "Please check the name of the eMail address." >&2 exit 1 # elif [ $(echo "$CLIENT" == "outlook" ]; then # smtp_name=$(grep -r "smtp" "$IMAP_FOLDER" | grep -m 1 -iv archives | sed -e 's_^.*\(y\).__' | sed -e 's_ .*$__') # SMTP_FOLDER=$(mkdir "$IMAP_FOLDER"/"$smtp_name") # if [ -d "$SMTP_FOLDER" ]; then # echo "$CLIENT's initial server folder is at $SMTP_FOLDER." # fi # echo "Creating temp directory for Outlook mail." # mkdir "$IMAP_FOLDER"/"$ADDRESS" fi fi # identify the $SMTP_FOLDER for $ADDRESS SMTP_FOLDER=$(cd "$eMAIL_FOLDER"; cd ..; pwd) if [ -d "$SMTP_FOLDER" ]; then echo "$CLIENT's SMTP folder is at $SMTP_FOLDER" # elif [ $(echo "$CLIENT" == "outlook" ]; then # smtp_name=$(grep -r "smtp" "$IMAP_FOLDER" | grep -m 1 -iv archives | sed -e 's_^.*\(y\).__' | sed -e 's_ .*$__') # SMTP_FOLDER=$(mkdir "$IMAP_FOLDER"/"$smtp_name") # if [ -d "$SMTP_FOLDER" ]; then # echo "$CLIENT's initial server folder is at $SMTP_FOLDER." # fi # echo "Creating temp directory for Outlook mail." # mkdir "$IMAP_FOLDER"/"$ADDRESS" else echo "Please manually check for $SMTP_FOLDER in the directory tree." >&2 exit 1 fi # identify the imap folder for $ADDRESS: IMAP_FOLDER=$(cd "$SMTP_FOLDER"; cd ..; pwd) if [ -d "$IMAP_FOLDER" ]; then echo "The path to $CLIENT's folder containing the IMAP directories is $IMAP_FOLDER" # elif [ $(echo "$CLIENT") == "outlook") ]; then # IMAP_FOLDER=$(find "$CLIENT_FOLDER" -type d -iname "outlook*") # if [ -d "$IMAP_FOLDER" ]; then # echo "The path to Outlook's main directory is $IMAP_FOLDER" # fi else echo "ERROR: Please check the path to the imap sub-directory." >&2 exit 1 fi # check for MBOX formatting; extract to MH format if [ $(find "$IMAP_FOLDER" -type f -iname "*.mbox" -o -iname "*.msf" | wc -l) -gt 0 ]; then echo "This is an mbox format eMail client." mkdir "$IMAP_FOLDER"/"$ADDRESS" mbox2mh -r "$SMTP_FOLDER" "$IMAP_FOLDER"/"$ADDRESS" mv "$IMAP_FOLDER"/"$ADDRESS" "$SMTP_FOLDER" echo "Moving temp directory of mail files." eMAIL_FOLDER=$(find "$CLIENT_FOLDER" -type d -iname "$ADDRESS" | grep -iv tags | grep -iv archives) if [ -d "$eMAIL_FOLDER" ]; then echo "The path to the temp eMail folder is $eMAIL_FOLDER." fi else echo "$CLIENT is not mbox format. Moving on..." fi # check for Outlook .pst format; extract to MH format # call readpst #if [ $(find "$IMAP_FOLDER" -type f -iname "*.pst" | wc -l) -gt 0 ]; then # echo "This is an Outlook eMail client." # readpst -M "Outlook.pst" "$IMAP_FOLDER"/"$ADDRESS" # mv "$IMAP_FOLDER"/"$ADDRESS" "$SMTP_FOLDER" # echo "Moving temp directory of Outlook mail." # eMAIL_FOLDER=$(find "$CLIENT_FOLDER" -type d -iname "$ADDRESS" | grep -iv tags | grep -iv archives) # if [ -d "$eMAIL_FOLDER" ]; then # echo "The path to the temp eMail folder is now $eMAIL_FOLDER." # fi cd $SMTP_FOLDER echo "Now in the SMTP_FOLDER." # ---------------------------------------- # this section assigns the array values # commands to create MIRROR archive directories # note: if a directory already exists, it will be skipped: # create an array with the ORIGINAL directory values: inbx_folder=$(find "$eMAIL_FOLDER" -type d -iname "*inbox*") echo "The little inbox folder is at $inbx_folder" boxarray=$(ls -R "$eMAIL_FOLDER" | grep "$inbx_folder" | sed -e "s_^\($SMTP_FOLDER\)/__" | tr ':' '/') # create a MIRROR of each directory in the /archive/ tree: for i in $boxarray; do mkdir -vp "$IMAP_FOLDER"/archives/"$i" done # create the $mailarray of all eMails to be archived: mailarray=$(find "$eMAIL_FOLDER" -type f) #echo $mailarray # steps to refine $mailarray to skip rm & b-up of regular eMail files # in the main INBOX directory. inbarray=$(find "$inbx_folder" -maxdepth 1 -type f) sntarray=$(find $(find "$inbx_folder" -type d -iname sent) -type f) #echo "Proceeding to refine '\$mailarray' to exclude eMails in main INBOX and Sent folder." # to back-up eMails in main INBOX directory, comment out the lines above # and the line below. mailarray=$(echo "$mailarray" | grep -iEv "$inbarray" | grep -iEv "$sntarray") # echo $mailarray echo "We're dealing with " $(echo "$mailarray" | wc -l) "eMails." echo "'\$mailarray' refined. Proceeding with eMail archiving." # pwd # --------------------------------------- # this section processes and files the MH format eMails: for i in $mailarray; do # step 1: assign the date variables for $i from eMail header, idate=$(less -f "$i" | grep -im 1 "Date:\ " | sed -e 's_^.*\(ate: \)__') echo "$idate" idt=$(date -d "$idate" +%F_%T%Z) echo "$idt" # step 2A: check ( eMail date ) < ( current date - 120 days ) if (( $(date -ud "$idate" +%s) < $(date -ud "$ARCHDATE" +%s) )); then echo "$i needs to be archived." # step 2B: check naming convention: if [[ -f "$i" && -n "$(echo $i | grep -iEv '*[0-9]{4}-[0-9]{2}-[0-9]{2}_[0..23]?[0..59]?[0..59]*')" ]]; then echo "The name of eMail $i needs to be changed" # step 3: pull the sender from the header, assign it to the variable $idn: idn=$(less -f "$i" | grep -iEm 1 "From:\ " | sed -e 's_^.*\(rom\).*<\|^.*\(rom:\).__' | sed -e 's_@.*$__') echo "$idn" # step 4: construct name from parts inewname=$(echo "$idt"@"$idn") echo "$inewname" # step 5: confirm name matches format + filter out non-eMails (such as hidden .blah-blah files) if [ -n "$(echo \"$inewname\" | grep -iE '*[0-9]{4}-[0-9]{2}-[0-9]{2}_[0..23]?[0..59]?[0..59]*')" ]; then # step 6: set the parent directory path for "$i", move & rename "$i" to "$newname" pardir=$(echo "$i" | sed -e 's_/[^/]*$__' | sed -e "s_^\($SMTP_FOLDER\)/__") echo "The path to pardir is \"$pardir\"" cp -vn "$i" "$IMAP_FOLDER"/archives/"$pardir"/"$inewname" else echo "The file is not an eMail. Moving on to next file..." >&2 exit 1 fi else echo "File name $i is correct. Exit name change" >&2 # exit 1 fi else echo "Eegads. The eMail $i is much too young for the old-eMails' home." >&2 fi done # clean up the temp directory for .mbox format if [ "$CLIENT" == "thunderbird" -o "$CLIENT" == "outlook" ]; then # future fix: auto-detect clients that need these steps echo "The client is $CLIENT. Deleting temp directory at $SMTP_FOLDER/$ADDRESS." rm -r "$SMTP_FOLDER"/"$ADDRESS" else echo "$CLIENT is not Thunderbird or Outlook." fi # ------------------------------------------------------------- # clean-up section: # return Internal Field Seperator to original value: IFS=$OIFS # return to starting directory: cd $CWD # make /archives/log directory & move log file into /archive/log directory mkdir "$IMAP_FOLDER"/archives/logs/ echo "A log file of this back-up can be found in $IMAP_FOLDER/archives/logs/ for reference." mv ~/Desktop/McGmail-log_*.log "$IMAP_FOLDER"/archives/logs/ exit