Skip to content
Snippets Groups Projects
Commit d2a05cd2 authored by Ruben's avatar Ruben
Browse files

Cleaning deleted file

parent 3f4f8b0d
No related branches found
No related tags found
No related merge requests found
import os
import sys
import getpass
import grp
import json
import textwrap
from os.path import *
from argparse import *
from datetime import datetime
#*************************************************************
#Functions
#*************************************************************
#Shell command output checker
def check_shellcommand(command_run,msg):
if command_run != 0:
logfile.write(msg+" unsuccessful (Stopping):\n")
logfile.close()
sys.exit("Stopping: "+msg+" unsuccessful")
#Remove slash character if given with directory
#Checks that we are in the same level as the target directory
def checkcorrectname(directory):
if directory[len(directory)-1] == '/':
directory=directory[:-1]
if '/' in directory:
sys.exit("Stopping: Wrong directory name. Present PATH should be at the same level of the target directory")
else:
return directory
#Checks that subdirectories do not contain / character
def checkcorrectnameexcluded(excluded):
for i, f in enumerate(excluded):
print f
if f[len(f)-1] == '/':
f=f[:-1]
if '/' in f:
sys.exit("Stopping: Wrong subdirectory name %s. It should be maximum one level depth from the target directory" %(f))
excluded[i]=f
return excluded
#Checks if directory exists
def checkdirectory(directory,out):
pathfile=os.environ['PWD']+'/'+directory
exists=os.path.isdir(pathfile)
print "Searching %s" %(pathfile)
logfile.write("Searching %s\n" %(pathfile))
if exists:
if out == 0:
print "Directory %s found!" %(directory)
logfile.write("Directory %s found!\n" %(directory))
else:
print "Directory %s found and excluded!" %(directory)
logfile.write("Directory %s found and excluded!\n" %(directory))
else:
logfile.write("Stopping: directory not found\n")
logfile.close()
sys.exit("Stopping: directory not found")
logfile.flush()
#Checks if metadata exists and has the correct JSON format
def checkmetadata(directory):
pathfile=os.environ['PWD']+'/'+directory+'/ARCHIVE_METADATA.json'
exists=os.path.isfile(pathfile)
print "Searching metadata %s" %(pathfile)
logfile.write("Searching metadata %s\n" %(pathfile))
if exists:
print "Metadata found!"
logfile.write("Metadata found!\n")
correct=is_json(pathfile)
if correct:
print "Metadata format is correct"
logfile.write("Metadata format is correct\n")
else:
print "Metadata format is NOT correct. Use -h or --help for an example of JSON format"
logfile.write("Metadata format is NOT correct\n")
else:
logfile.write("Stopping: metadata not found\n")
logfile.close()
sys.exit("Stopping: metadata not found")
logfile.flush()
#JSON validator: Stolen from StackExchange
def is_json(jsonfile):
f=open(jsonfile, 'r')
myjson=f.read()
try:
json_object = json.loads(myjson)
except ValueError, e:
return False
return True
#Create list of files (manifest)
def createlist(directory,nameman,excluded):
print "Listing files included in the backup"
if excluded:
excluding=' '
for f in excluded:
excluding=excluding+'\( -path ./'+directory+'/'+f+' -prune \) -o '
command='find ./'+directory+excluding+' -ls > '+nameman
else:
command='find ./'+directory+' -ls > '+nameman
logfile.write("Listing files included in the backup\n")
logfile.write("Executing command: %s\n" %(command))
logfile.flush()
command_run=os.system(command)
msg="Listing of files "
check_shellcommand(command_run,msg)
os.system('cp '+nameman+' '+directory)
logfile.flush()
#Create list of files md5sum
def createlist_md5sum(directory,namemd5,excluded):
print "creating md5sum for files included in the backup"
if excluded:
excluding=' '
for f in excluded:
excluding=excluding+'\( -path ./'+directory+'/'+f+' -prune \) -o '
command='find ./'+directory+excluding+' -type f -exec md5sum {} \; > '+namemd5
else:
command='find ./'+directory+' -type f -exec md5sum {} \; > '+namemd5
logfile.write("Listing md5sum of files included in the backup\n")
logfile.write("Executing command: %s\n" %(command))
logfile.flush()
command_run=os.system(command)
msg="Creating md5sum of files "
check_shellcommand(command_run,msg)
os.system('cp '+namemd5+' '+directory)
logfile.flush()
#Create archive file tar
def createarchive(directory,archivefile,excluded):
msg="Creating archive "
print msg+"%s" %(archivefile)
if excluded:
excluding=' '
for f in excluded:
excluding=excluding+"--exclude='"+directory+'/'+f+"' "
command='tar'+excluding+'-cf - '+directory+' | lbzip2 -n 4 > '+archivefile
else:
command='tar cf - '+directory+' | lbzip2 -n 4 > '+archivefile
logfile.write("Creating archive %s\n" %(archivefile))
logfile.write("Executing command: %s\n" %(command))
command_run=os.system(command)
check_shellcommand(command_run,msg)
logfile.flush()
#Check compressed file integrity
def checkintegrity(archivefile):
msg="Checking integrity"
print msg
command='lbzip2 -tv '+archivefile
logfile.write("Archive %s created! Checking integrity\n" %(archivefile))
logfile.write("Executing command: %s\n" %(command))
command_run=os.system(command)
check_shellcommand(command_run,msg)
logfile.flush()
#Rename original folder
def renameoriginal(directory):
msg="Renaming original directory "
print msg
command='mv '+directory+" "+directory+".toberemoved"
logfile.write("Renaming original directory\n")
logfile.write("Executing command: %s\n" %(command))
command_run=os.system(command)
check_shellcommand(command_run,msg)
logfile.flush()
#Store compressed file in the corresponding (groupPI) directory
def store(archivefile,groupname,username):
msg="Moving archive %s to /scicore/archive/%s/%s\n" %(archivefile, groupname, username)
print msg
command='mv '+archivefile+' '+'/scicore/archive/'+groupname+'/'+username+'/'
logfile.write("Moving archive %s to /scicore/archive/%s/%s\n" %(archivefile, groupname, username))
logfile.write("Executing command: %s\n" %(command))
command_run=os.system(command)
check_shellcommand(command_run,msg)
logfile.flush()
#*************************************************************
#Parsing arguments and options from command line
#*************************************************************
usage = "python %(prog)s [options] directory"
description = '''
description:
cjarchive.py archives a folder and all its contents in a compressed file
with the name USER_YYYYMMDDHHMMSS_DIRECTORY.tar.bz2. Requires that the
target folder contains a metadata file named ARCHIVE_METADATA.json in
JSON format (see below an example).
After the archive has been created it is moved to
/scicore/archive/<group>/<user>/
Developed by Ruben M. Cabezon - sciCORE (University of Basel)
ruben.cabezon@unibas.ch
'''
epilog='''
JSON format example for the ARCHIVE_METADATA.json file:
{
"name": "NAME OF INVESTIGATOR",
"email": "EMAIL OF INVESTIGATOR",
"pi_name": "NAME OF PI",
"pi_email": "EMAIL OF PI",
"project": "INSERT PROJECT NAME HERE",
"project_start_date": "YYYY-MM-DD",
"project_end_date": "YYYY-MM-DD",
"description": "INSERT PROJECT DESCRIPTION HERE MULTILINE IS NOT OK",
"collaborators":[
{ "name": "COLLABORATOR NAME",
"email": "COLLABORATOR EMAIL"
},
{ "name": "COLLABORATOR NAME",
"email": "COLLABORATOR EMAIL"
}
],
"comments": "ADDITIONAL COMMENTS (E.G. LEGAL REQUIREMENTS REGARDING DURATION OF DATA PRESERVATION, ETC...)"
}
'''
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,usage=usage,description=textwrap.dedent(description),epilog=textwrap.dedent(epilog))
parser.add_argument("directory",
help="specify directory, from current level, to be archived")
parser.add_argument("-x","--exclude",action='append', metavar='subdirectory',
help="specify subdirectories to be excluded from archiving (only first level subdirectories names, not full path) Can be repeated for additional subdirectories.")
args = parser.parse_args()
#*************************************************************
#Initialization
#*************************************************************
directory=checkcorrectname(args.directory)
if args.exclude:
excluded=checkcorrectnameexcluded(args.exclude)
else:
excluded=[]
now=datetime.now()
username=getpass.getuser()
groupname=grp.getgrgid(os.getgid()).gr_name
name=username+'_'+now.strftime('%Y%m%dT%H%M%S')+'_'+directory
namelog=name+'.log'
namemd5=name+'.md5sum'
nameman=name+'.manifest'
archivefile=name+'.tar.bz2'
#*************************************************************
#Open logfile
#*************************************************************
logfile=open(namelog,'a')
#*************************************************************
#Main code
#*************************************************************
checkdirectory(directory,0)
checkmetadata(directory)
if args.exclude:
for f in excluded:
checkdirectory(directory+'/'+f,1)
createlist(directory,nameman,excluded)
createlist_md5sum(directory,namemd5,excluded)
createarchive(directory,archivefile,excluded)
checkintegrity(archivefile)
renameoriginal(directory)
store(archivefile,groupname,username)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment