From dfe415a8dbd9a1c19b9851cccbf544d77b654f0d Mon Sep 17 00:00:00 2001
From: Ruben <ruben.cabezon@unibas.ch>
Date: Thu, 21 Nov 2024 12:24:35 +0100
Subject: [PATCH] Revised version with Python 3 compatibility

---
 bin/cjarchiver | 162 ++++++++++++++++++++++---------------------------
 1 file changed, 71 insertions(+), 91 deletions(-)

diff --git a/bin/cjarchiver b/bin/cjarchiver
index e78799e..b1c81d5 100755
--- a/bin/cjarchiver
+++ b/bin/cjarchiver
@@ -12,17 +12,31 @@ from datetime import datetime
 #*************************************************************
 #Functions
 #*************************************************************
+#Verbose steps:
+def say(command,msg):
+    print(msg)
+    logfile.write(msg+"\n")
+    if command != "":
+        logfile.write(f"Executing command: {command}\n")
+    logfile.flush()
+    
+#Command executer
+def executer(command,msg):
+    say(command, msg)
+    command_run=os.system(command)
+    check_shellcommand(command_run,msg)
+
 #Shell command output checker
 def check_shellcommand(command_run,msg):
     if command_run != 0:
-        logfile.write(msg+" unsuccessful (Stopping):\n")
+        say("",msg+" unsuccessful (Stopping)")
         logfile.close()
-        sys.exit("Stopping: "+msg+" unsuccessful")
+        sys.exit()
 
 #Remove slash character if given with directory
 #Checks that we are in the same level as the target directory
 def checkcorrectname(directory):
-    if directory[len(directory)-1] == '/':
+    if directory.endswith('/'):
         directory=directory[:-1]
     if '/' in directory:
         sys.exit("Stopping: Wrong directory name. Present PATH should be at the same level of the target directory")
@@ -32,112 +46,103 @@ def checkcorrectname(directory):
 #Checks that subdirectories do not contain / character
 def checkcorrectnameexcluded(excluded):
     for i, f in enumerate(excluded):
-        print f
-        if f[len(f)-1] == '/':
+        print(f)
+        if f.endswith('/'):
             f=f[:-1]
         if '/' in f:
-            sys.exit("Stopping: Wrong subdirectory name %s. It should be maximum one level depth from the target directory" %(f))
+            sys.exit(f"Stopping: Wrong subdirectory name {f}. It should be maximum one level depth from the target directory")
         excluded[i]=f
     return excluded
 
 #Checks if directory exists
 def checkdirectory(directory,out):
-    pathfile=os.environ['PWD']+'/'+directory
+    pathfile = os.path.join(os.environ['PWD'], directory)
     exists=os.path.isdir(pathfile)
-    print "Searching %s" %(pathfile)
-    logfile.write("Searching %s\n" %(pathfile))
+    say("", f"Searching {pathfile}")
     if exists:
         if out == 0:
-            print "Directory %s found!" %(directory)
-            logfile.write("Directory %s found!\n" %(directory))
+            say("","Directory %s found!" %(directory))
         else:
-            print "Directory %s found and excluded!" %(directory)
-            logfile.write("Directory %s found and excluded!\n" %(directory))
+            say("","Directory %s found and excluded!" %(directory))
     else:
-        logfile.write("Stopping: directory not found\n")
+        say("","Stopping: directory not found")
         logfile.close()
         sys.exit("Stopping: directory not found")
+    dest_directory=directory+'.toberemoved'
+    destination = os.path.join(os.environ['PWD'], dest_directory)
+    exists=os.path.isdir(destination)
+    if exists:
+        say("","Stopping: directory %s already exists!" %(dest_directory))
+        logfile.close()
+        sys.exit()
     logfile.flush()
 
 #Checks if metadata exists and has the correct JSON format
 def checkmetadata(directory,namejson):
-    pathfile=os.environ['PWD']+'/'+directory+'/ARCHIVE_METADATA.json'
+    pathfile = os.path.join(os.environ['PWD'], directory, 'ARCHIVE_METADATA.json')
     exists=os.path.isfile(pathfile)
-    print "Searching metadata %s" %(pathfile)
-    logfile.write("Searching metadata %s\n" %(pathfile))
+    say("", f"Searching metadata {pathfile}")
     if exists:
-        print "Metadata found!"
-        logfile.write("Metadata found!\n")
+        say("","Metadata found!")
         correct=is_json(pathfile)
         if correct:
-            print "Metadata format is correct"
-            logfile.write("Metadata format is correct\n")
-            os.system('cp '+pathfile+' ./'+namejson)
-            logfile.write("json file copied\n")
-            logfile.flush()
+            say("","Metadata format is correct")
+            command='cp '+pathfile+' ./'+namejson
+            msg='Copying metadata in landing zone'
+            executer(command,msg)
         else:
-            print "Metadata format is NOT correct. Use -h or --help for an example of JSON format"
+            say("","Metadata format is NOT correct. Use -h or --help for an example of JSON format")
             logfile.write("Metadata format is NOT correct\n")
     else:
-        logfile.write("Stopping: metadata not found\n")
+        say("","Stopping: metadata not found")
         logfile.close()
-        sys.exit("Stopping: metadata not found")
+        sys.exit()
     logfile.flush()
 
 
 #JSON validator: Stolen from StackExchange
 def is_json(jsonfile):
-    f=open(jsonfile, 'r')
-    myjson=f.read()
+    with open(jsonfile, 'r') as f:
+        myjson = f.read()
     try:
         json_object = json.loads(myjson)
-    except ValueError, e:
+    except ValueError as e:
         return False
     return True
 
 
 #Create list of files (manifest)
 def createlist(directory,nameman,excluded):
-    print "Listing files included in the backup"
     if excluded:
         excluding=' '
         for f in excluded:
-            excluding=excluding+'\( -path ./'+directory+'/'+f+' -prune \) -o '
+            excluding=excluding+r'\( -path ./'+directory+'/'+f+r' -prune \) -o '
         command='find ./'+directory+excluding+' -ls > '+nameman
     else:
         command='find ./'+directory+' -ls > '+nameman
-    logfile.write("Listing files included in the backup\n")
-    logfile.write("Executing command: %s\n" %(command))
-    logfile.flush()
-    command_run=os.system(command)
-    msg="Listing of files "
-    check_shellcommand(command_run,msg)
-    os.system('cp '+nameman+' '+directory)
-    logfile.flush()
+    msg="Listing files included in the backup"
+    executer(command, msg)
+    command='cp '+nameman+' '+directory
+    msg='Copying manifest in directory'
+    executer(command, msg)
 
 #Create list of files md5sum
 def createlist_md5sum(directory,namemd5,excluded):
-    print "creating md5sum for files included in the backup"
     if excluded:
         excluding=' '
         for f in excluded:
-            excluding=excluding+'\( -path ./'+directory+'/'+f+' -prune \) -o '
-        command='find ./'+directory+excluding+' -type f -exec md5sum {} \; > '+namemd5
+            excluding=excluding+r'\( -path ./'+directory+'/'+f+r' -prune \) -o '
+        command='find ./'+directory+excluding+r' -type f -exec md5sum {} \; > '+namemd5
     else:
-        command='find ./'+directory+' -type f -exec md5sum {} \; > '+namemd5
-    logfile.write("Listing md5sum of files included in the backup\n")
-    logfile.write("Executing command: %s\n" %(command))
-    logfile.flush()
-    command_run=os.system(command)
-    msg="Creating md5sum of files "
-    check_shellcommand(command_run,msg)    
-    os.system('cp '+namemd5+' '+directory)
-    logfile.flush()
+        command='find ./'+directory+r' -type f -exec md5sum {} \; > '+namemd5
+    msg="Listing md5sum of files included in the backup"
+    executer(command, msg)
+    command='cp '+namemd5+' '+directory
+    msg="Copying md5sum of files in directory"
+    executer(command, msg)
 
 #Create archive file tar
 def createarchive(directory,archivefile,excluded):
-    msg="Creating archive "
-    print msg+"%s" %(archivefile)
     if excluded:
         excluding=' '
         for f in excluded:
@@ -145,44 +150,20 @@ def createarchive(directory,archivefile,excluded):
         command='tar'+excluding+'-cf - '+directory+' | lbzip2 -n 4 > '+archivefile
     else:
         command='tar cf - '+directory+' | lbzip2 -n 4 > '+archivefile
-    logfile.write("Creating archive %s\n" %(archivefile))
-    logfile.write("Executing command: %s\n" %(command))
-    command_run=os.system(command)
-    check_shellcommand(command_run,msg)
-    logfile.flush()
+    msg="Creating archive %s " %(archivefile)
+    executer(command, msg)
 
 #Check compressed file integrity
 def checkintegrity(archivefile):
-    msg="Checking integrity"
-    print msg
+    msg="Archive %s created! Checking integrity" %(archivefile)
     command='lbzip2 -tv '+archivefile
-    logfile.write("Archive %s created! Checking integrity\n" %(archivefile))
-    logfile.write("Executing command: %s\n" %(command))
-    command_run=os.system(command)
-    check_shellcommand(command_run,msg)
-    logfile.flush()
+    executer(command, msg)
 
 #Rename original folder
 def renameoriginal(directory):
     msg="Renaming original directory "
-    print msg
     command='mv '+directory+" "+directory+".toberemoved"
-    logfile.write("Renaming original directory\n")
-    logfile.write("Executing command: %s\n" %(command))
-    command_run=os.system(command)
-    check_shellcommand(command_run,msg)
-    logfile.flush()
-
-#Store compressed file in the corresponding (groupPI) directory
-def store(archivefile,groupname,username):
-    msg="Moving archive %s to /scicore/archive/%s/%s\n" %(archivefile, groupname, username)
-    print msg
-    command='mv '+archivefile+' '+'/scicore/archive/'+groupname+'/'+username+'/'
-    logfile.write("Moving archive %s to /scicore/archive/%s/%s\n" %(archivefile, groupname, username))
-    logfile.write("Executing command: %s\n" %(command))
-    command_run=os.system(command)
-    check_shellcommand(command_run,msg)
-    logfile.flush()
+    executer(command, msg)
 
 #*************************************************************
 #Parsing arguments and options from command line
@@ -240,12 +221,12 @@ else:
 now=datetime.now()
 username=getpass.getuser()
 groupname=grp.getgrgid(os.getgid()).gr_name
-name=username+'_'+now.strftime('%Y%m%dT%H%M%S')+'_'+directory
-namelog=name+'.log'
-namemd5=name+'.md5sum'
-nameman=name+'.manifest'
-archivefile=name+'.tar.bz2'
-namejson=name+'.json'
+name = f"{username}_{now.strftime('%Y%m%dT%H%M%S')}_{directory}"
+namelog = f"{name}.log"
+namemd5 = f"{name}.md5sum"
+nameman = f"{name}.manifest"
+archivefile = f"{name}.tar.bz2"
+namejson = f"{name}.json"
 
 #*************************************************************
 #Open logfile
@@ -260,10 +241,9 @@ checkdirectory(directory,0)
 checkmetadata(directory,namejson)
 if args.exclude:
     for f in excluded:
-        checkdirectory(directory+'/'+f,1)
+        checkdirectory(os.path.join(directory, f), 1)
 createlist(directory,nameman,excluded)
 createlist_md5sum(directory,namemd5,excluded)
 createarchive(directory,archivefile,excluded)
 checkintegrity(archivefile)
-#store(archivefile,groupname,username)
 renameoriginal(directory)
-- 
GitLab