Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
cjarchiver
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Container registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
scicore
cjarchiver
Commits
d2a05cd2
Commit
d2a05cd2
authored
8 years ago
by
Ruben
Browse files
Options
Downloads
Patches
Plain Diff
Cleaning deleted file
parent
3f4f8b0d
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
cjarchiver.py
+0
-267
0 additions, 267 deletions
cjarchiver.py
with
0 additions
and
267 deletions
cjarchiver.py
deleted
100644 → 0
+
0
−
267
View file @
3f4f8b0d
import
os
import
sys
import
getpass
import
grp
import
json
import
textwrap
from
os.path
import
*
from
argparse
import
*
from
datetime
import
datetime
#*************************************************************
#Functions
#*************************************************************
#Shell command output checker
def
check_shellcommand
(
command_run
,
msg
):
if
command_run
!=
0
:
logfile
.
write
(
msg
+
"
unsuccessful (Stopping):
\n
"
)
logfile
.
close
()
sys
.
exit
(
"
Stopping:
"
+
msg
+
"
unsuccessful
"
)
#Remove slash character if given with directory
#Checks that we are in the same level as the target directory
def
checkcorrectname
(
directory
):
if
directory
[
len
(
directory
)
-
1
]
==
'
/
'
:
directory
=
directory
[:
-
1
]
if
'
/
'
in
directory
:
sys
.
exit
(
"
Stopping: Wrong directory name. Present PATH should be at the same level of the target directory
"
)
else
:
return
directory
#Checks that subdirectories do not contain / character
def
checkcorrectnameexcluded
(
excluded
):
for
i
,
f
in
enumerate
(
excluded
):
print
f
if
f
[
len
(
f
)
-
1
]
==
'
/
'
:
f
=
f
[:
-
1
]
if
'
/
'
in
f
:
sys
.
exit
(
"
Stopping: Wrong subdirectory name %s. It should be maximum one level depth from the target directory
"
%
(
f
))
excluded
[
i
]
=
f
return
excluded
#Checks if directory exists
def
checkdirectory
(
directory
,
out
):
pathfile
=
os
.
environ
[
'
PWD
'
]
+
'
/
'
+
directory
exists
=
os
.
path
.
isdir
(
pathfile
)
print
"
Searching %s
"
%
(
pathfile
)
logfile
.
write
(
"
Searching %s
\n
"
%
(
pathfile
))
if
exists
:
if
out
==
0
:
print
"
Directory %s found!
"
%
(
directory
)
logfile
.
write
(
"
Directory %s found!
\n
"
%
(
directory
))
else
:
print
"
Directory %s found and excluded!
"
%
(
directory
)
logfile
.
write
(
"
Directory %s found and excluded!
\n
"
%
(
directory
))
else
:
logfile
.
write
(
"
Stopping: directory not found
\n
"
)
logfile
.
close
()
sys
.
exit
(
"
Stopping: directory not found
"
)
logfile
.
flush
()
#Checks if metadata exists and has the correct JSON format
def
checkmetadata
(
directory
):
pathfile
=
os
.
environ
[
'
PWD
'
]
+
'
/
'
+
directory
+
'
/ARCHIVE_METADATA.json
'
exists
=
os
.
path
.
isfile
(
pathfile
)
print
"
Searching metadata %s
"
%
(
pathfile
)
logfile
.
write
(
"
Searching metadata %s
\n
"
%
(
pathfile
))
if
exists
:
print
"
Metadata found!
"
logfile
.
write
(
"
Metadata found!
\n
"
)
correct
=
is_json
(
pathfile
)
if
correct
:
print
"
Metadata format is correct
"
logfile
.
write
(
"
Metadata format is correct
\n
"
)
else
:
print
"
Metadata format is NOT correct. Use -h or --help for an example of JSON format
"
logfile
.
write
(
"
Metadata format is NOT correct
\n
"
)
else
:
logfile
.
write
(
"
Stopping: metadata not found
\n
"
)
logfile
.
close
()
sys
.
exit
(
"
Stopping: metadata not found
"
)
logfile
.
flush
()
#JSON validator: Stolen from StackExchange
def
is_json
(
jsonfile
):
f
=
open
(
jsonfile
,
'
r
'
)
myjson
=
f
.
read
()
try
:
json_object
=
json
.
loads
(
myjson
)
except
ValueError
,
e
:
return
False
return
True
#Create list of files (manifest)
def
createlist
(
directory
,
nameman
,
excluded
):
print
"
Listing files included in the backup
"
if
excluded
:
excluding
=
'
'
for
f
in
excluded
:
excluding
=
excluding
+
'
\( -path ./
'
+
directory
+
'
/
'
+
f
+
'
-prune \) -o
'
command
=
'
find ./
'
+
directory
+
excluding
+
'
-ls >
'
+
nameman
else
:
command
=
'
find ./
'
+
directory
+
'
-ls >
'
+
nameman
logfile
.
write
(
"
Listing files included in the backup
\n
"
)
logfile
.
write
(
"
Executing command: %s
\n
"
%
(
command
))
logfile
.
flush
()
command_run
=
os
.
system
(
command
)
msg
=
"
Listing of files
"
check_shellcommand
(
command_run
,
msg
)
os
.
system
(
'
cp
'
+
nameman
+
'
'
+
directory
)
logfile
.
flush
()
#Create list of files md5sum
def
createlist_md5sum
(
directory
,
namemd5
,
excluded
):
print
"
creating md5sum for files included in the backup
"
if
excluded
:
excluding
=
'
'
for
f
in
excluded
:
excluding
=
excluding
+
'
\( -path ./
'
+
directory
+
'
/
'
+
f
+
'
-prune \) -o
'
command
=
'
find ./
'
+
directory
+
excluding
+
'
-type f -exec md5sum {} \; >
'
+
namemd5
else
:
command
=
'
find ./
'
+
directory
+
'
-type f -exec md5sum {} \; >
'
+
namemd5
logfile
.
write
(
"
Listing md5sum of files included in the backup
\n
"
)
logfile
.
write
(
"
Executing command: %s
\n
"
%
(
command
))
logfile
.
flush
()
command_run
=
os
.
system
(
command
)
msg
=
"
Creating md5sum of files
"
check_shellcommand
(
command_run
,
msg
)
os
.
system
(
'
cp
'
+
namemd5
+
'
'
+
directory
)
logfile
.
flush
()
#Create archive file tar
def
createarchive
(
directory
,
archivefile
,
excluded
):
msg
=
"
Creating archive
"
print
msg
+
"
%s
"
%
(
archivefile
)
if
excluded
:
excluding
=
'
'
for
f
in
excluded
:
excluding
=
excluding
+
"
--exclude=
'"
+
directory
+
'
/
'
+
f
+
"'
"
command
=
'
tar
'
+
excluding
+
'
-cf -
'
+
directory
+
'
| lbzip2 -n 4 >
'
+
archivefile
else
:
command
=
'
tar cf -
'
+
directory
+
'
| lbzip2 -n 4 >
'
+
archivefile
logfile
.
write
(
"
Creating archive %s
\n
"
%
(
archivefile
))
logfile
.
write
(
"
Executing command: %s
\n
"
%
(
command
))
command_run
=
os
.
system
(
command
)
check_shellcommand
(
command_run
,
msg
)
logfile
.
flush
()
#Check compressed file integrity
def
checkintegrity
(
archivefile
):
msg
=
"
Checking integrity
"
print
msg
command
=
'
lbzip2 -tv
'
+
archivefile
logfile
.
write
(
"
Archive %s created! Checking integrity
\n
"
%
(
archivefile
))
logfile
.
write
(
"
Executing command: %s
\n
"
%
(
command
))
command_run
=
os
.
system
(
command
)
check_shellcommand
(
command_run
,
msg
)
logfile
.
flush
()
#Rename original folder
def
renameoriginal
(
directory
):
msg
=
"
Renaming original directory
"
print
msg
command
=
'
mv
'
+
directory
+
"
"
+
directory
+
"
.toberemoved
"
logfile
.
write
(
"
Renaming original directory
\n
"
)
logfile
.
write
(
"
Executing command: %s
\n
"
%
(
command
))
command_run
=
os
.
system
(
command
)
check_shellcommand
(
command_run
,
msg
)
logfile
.
flush
()
#Store compressed file in the corresponding (groupPI) directory
def
store
(
archivefile
,
groupname
,
username
):
msg
=
"
Moving archive %s to /scicore/archive/%s/%s
\n
"
%
(
archivefile
,
groupname
,
username
)
print
msg
command
=
'
mv
'
+
archivefile
+
'
'
+
'
/scicore/archive/
'
+
groupname
+
'
/
'
+
username
+
'
/
'
logfile
.
write
(
"
Moving archive %s to /scicore/archive/%s/%s
\n
"
%
(
archivefile
,
groupname
,
username
))
logfile
.
write
(
"
Executing command: %s
\n
"
%
(
command
))
command_run
=
os
.
system
(
command
)
check_shellcommand
(
command_run
,
msg
)
logfile
.
flush
()
#*************************************************************
#Parsing arguments and options from command line
#*************************************************************
usage
=
"
python %(prog)s [options] directory
"
description
=
'''
description:
cjarchive.py archives a folder and all its contents in a compressed file
with the name USER_YYYYMMDDHHMMSS_DIRECTORY.tar.bz2. Requires that the
target folder contains a metadata file named ARCHIVE_METADATA.json in
JSON format (see below an example).
After the archive has been created it is moved to
/scicore/archive/<group>/<user>/
Developed by Ruben M. Cabezon - sciCORE (University of Basel)
ruben.cabezon@unibas.ch
'''
epilog
=
'''
JSON format example for the ARCHIVE_METADATA.json file:
{
"
name
"
:
"
NAME OF INVESTIGATOR
"
,
"
email
"
:
"
EMAIL OF INVESTIGATOR
"
,
"
pi_name
"
:
"
NAME OF PI
"
,
"
pi_email
"
:
"
EMAIL OF PI
"
,
"
project
"
:
"
INSERT PROJECT NAME HERE
"
,
"
project_start_date
"
:
"
YYYY-MM-DD
"
,
"
project_end_date
"
:
"
YYYY-MM-DD
"
,
"
description
"
:
"
INSERT PROJECT DESCRIPTION HERE MULTILINE IS NOT OK
"
,
"
collaborators
"
:[
{
"
name
"
:
"
COLLABORATOR NAME
"
,
"
email
"
:
"
COLLABORATOR EMAIL
"
},
{
"
name
"
:
"
COLLABORATOR NAME
"
,
"
email
"
:
"
COLLABORATOR EMAIL
"
}
],
"
comments
"
:
"
ADDITIONAL COMMENTS (E.G. LEGAL REQUIREMENTS REGARDING DURATION OF DATA PRESERVATION, ETC...)
"
}
'''
parser
=
ArgumentParser
(
formatter_class
=
RawDescriptionHelpFormatter
,
usage
=
usage
,
description
=
textwrap
.
dedent
(
description
),
epilog
=
textwrap
.
dedent
(
epilog
))
parser
.
add_argument
(
"
directory
"
,
help
=
"
specify directory, from current level, to be archived
"
)
parser
.
add_argument
(
"
-x
"
,
"
--exclude
"
,
action
=
'
append
'
,
metavar
=
'
subdirectory
'
,
help
=
"
specify subdirectories to be excluded from archiving (only first level subdirectories names, not full path) Can be repeated for additional subdirectories.
"
)
args
=
parser
.
parse_args
()
#*************************************************************
#Initialization
#*************************************************************
directory
=
checkcorrectname
(
args
.
directory
)
if
args
.
exclude
:
excluded
=
checkcorrectnameexcluded
(
args
.
exclude
)
else
:
excluded
=
[]
now
=
datetime
.
now
()
username
=
getpass
.
getuser
()
groupname
=
grp
.
getgrgid
(
os
.
getgid
()).
gr_name
name
=
username
+
'
_
'
+
now
.
strftime
(
'
%Y%m%dT%H%M%S
'
)
+
'
_
'
+
directory
namelog
=
name
+
'
.log
'
namemd5
=
name
+
'
.md5sum
'
nameman
=
name
+
'
.manifest
'
archivefile
=
name
+
'
.tar.bz2
'
#*************************************************************
#Open logfile
#*************************************************************
logfile
=
open
(
namelog
,
'
a
'
)
#*************************************************************
#Main code
#*************************************************************
checkdirectory
(
directory
,
0
)
checkmetadata
(
directory
)
if
args
.
exclude
:
for
f
in
excluded
:
checkdirectory
(
directory
+
'
/
'
+
f
,
1
)
createlist
(
directory
,
nameman
,
excluded
)
createlist_md5sum
(
directory
,
namemd5
,
excluded
)
createarchive
(
directory
,
archivefile
,
excluded
)
checkintegrity
(
archivefile
)
renameoriginal
(
directory
)
store
(
archivefile
,
groupname
,
username
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment