From b4d4648a455ad35cd5707f8e63ed516e2fb3644d Mon Sep 17 00:00:00 2001
From: Gabriel Studer <gabriel.studer@unibas.ch>
Date: Thu, 4 May 2023 17:31:55 +0200
Subject: [PATCH] Replace older TMalign source code with USalign source code

Enables RNA comparison for simple chain by chain comparison.
Possible to extend to multichain functionality as implemented in
USalign.
---
 modules/bindings/doc/tmtools.rst              |   24 +-
 modules/bindings/pymod/export_tmalign.cc      |    6 +-
 .../src/{tmalign => USalign}/BLOSUM.h         |    0
 modules/bindings/src/USalign/Dockerfile       |   25 +
 .../src/{tmalign => USalign}/HwRMSD.cpp       |    0
 .../src/{tmalign => USalign}/HwRMSD.h         |    2 +-
 .../src/{tmalign => USalign}/Kabsch.h         |    0
 .../src/{tmalign => USalign}/MMalign.cpp      |  138 +-
 modules/bindings/src/USalign/MMalign.h        | 3040 ++++++++++++++++
 .../bindings/src/{tmalign => USalign}/NW.h    |   94 +
 .../src/{tmalign => USalign}/NWalign.cpp      |    0
 .../src/{tmalign => USalign}/NWalign.h        |  198 +-
 modules/bindings/src/USalign/OST_INFO         |    6 +
 .../src/{tmalign => USalign}/PDB1.pdb         |    0
 .../src/{tmalign => USalign}/PDB2.pdb         |    0
 modules/bindings/src/USalign/SOIalign.h       |  959 +++++
 .../src/{tmalign => USalign}/TMalign.cpp      |   30 +-
 .../src/{tmalign => USalign}/TMalign.h        |  356 +-
 .../src/{tmalign => USalign}/TMscore.cpp      |   36 +-
 .../src/{tmalign => USalign}/TMscore.h        |   17 +-
 modules/bindings/src/USalign/USalign.cpp      | 3137 +++++++++++++++++
 .../src/{tmalign => USalign}/align.txt        |    0
 .../src/{tmalign => USalign}/basic_fun.h      |  287 +-
 modules/bindings/src/USalign/cif2pdb.cpp      |  533 +++
 modules/bindings/src/USalign/flexalign.h      | 1826 ++++++++++
 .../src/{tmalign => USalign}/param_set.h      |    0
 .../src/{tmalign => USalign}/pdb2fasta.cpp    |   34 +-
 .../src/{tmalign => USalign}/pdb2ss.cpp       |    0
 .../src/{tmalign => USalign}/pdb2xyz.cpp      |    0
 modules/bindings/src/USalign/pdbAtomName.cpp  |  232 ++
 .../src/{tmalign => USalign}/pstream.h        |    7 +-
 modules/bindings/src/USalign/qTMclust.cpp     |  723 ++++
 .../src/{tmalign => USalign}/readme.txt       |   61 +-
 .../bindings/src/{tmalign => USalign}/se.cpp  |   18 +-
 .../bindings/src/{tmalign => USalign}/se.h    |   66 +-
 modules/bindings/src/USalign/usalign.py       |  132 +
 .../src/{tmalign => USalign}/xyz_sfetch.cpp   |   28 +-
 modules/bindings/src/tmalign/.gitignore       |   17 -
 modules/bindings/src/tmalign/MMalign.h        | 1194 -------
 modules/bindings/src/tmalign/OST_INFO         |    7 -
 modules/bindings/src/wrap_tmalign.cc          |  106 +-
 modules/bindings/src/wrap_tmalign.hh          |    3 +-
 42 files changed, 11687 insertions(+), 1655 deletions(-)
 rename modules/bindings/src/{tmalign => USalign}/BLOSUM.h (100%)
 create mode 100644 modules/bindings/src/USalign/Dockerfile
 rename modules/bindings/src/{tmalign => USalign}/HwRMSD.cpp (100%)
 rename modules/bindings/src/{tmalign => USalign}/HwRMSD.h (96%)
 rename modules/bindings/src/{tmalign => USalign}/Kabsch.h (100%)
 rename modules/bindings/src/{tmalign => USalign}/MMalign.cpp (81%)
 create mode 100644 modules/bindings/src/USalign/MMalign.h
 rename modules/bindings/src/{tmalign => USalign}/NW.h (80%)
 rename modules/bindings/src/{tmalign => USalign}/NWalign.cpp (100%)
 rename modules/bindings/src/{tmalign => USalign}/NWalign.h (72%)
 create mode 100644 modules/bindings/src/USalign/OST_INFO
 rename modules/bindings/src/{tmalign => USalign}/PDB1.pdb (100%)
 rename modules/bindings/src/{tmalign => USalign}/PDB2.pdb (100%)
 create mode 100644 modules/bindings/src/USalign/SOIalign.h
 rename modules/bindings/src/{tmalign => USalign}/TMalign.cpp (93%)
 rename modules/bindings/src/{tmalign => USalign}/TMalign.h (88%)
 rename modules/bindings/src/{tmalign => USalign}/TMscore.cpp (91%)
 rename modules/bindings/src/{tmalign => USalign}/TMscore.h (95%)
 create mode 100644 modules/bindings/src/USalign/USalign.cpp
 rename modules/bindings/src/{tmalign => USalign}/align.txt (100%)
 rename modules/bindings/src/{tmalign => USalign}/basic_fun.h (78%)
 create mode 100644 modules/bindings/src/USalign/cif2pdb.cpp
 create mode 100644 modules/bindings/src/USalign/flexalign.h
 rename modules/bindings/src/{tmalign => USalign}/param_set.h (100%)
 rename modules/bindings/src/{tmalign => USalign}/pdb2fasta.cpp (79%)
 rename modules/bindings/src/{tmalign => USalign}/pdb2ss.cpp (100%)
 rename modules/bindings/src/{tmalign => USalign}/pdb2xyz.cpp (100%)
 create mode 100644 modules/bindings/src/USalign/pdbAtomName.cpp
 rename modules/bindings/src/{tmalign => USalign}/pstream.h (99%)
 create mode 100644 modules/bindings/src/USalign/qTMclust.cpp
 rename modules/bindings/src/{tmalign => USalign}/readme.txt (66%)
 rename modules/bindings/src/{tmalign => USalign}/se.cpp (94%)
 rename modules/bindings/src/{tmalign => USalign}/se.h (73%)
 create mode 100644 modules/bindings/src/USalign/usalign.py
 rename modules/bindings/src/{tmalign => USalign}/xyz_sfetch.cpp (83%)
 delete mode 100644 modules/bindings/src/tmalign/.gitignore
 delete mode 100644 modules/bindings/src/tmalign/MMalign.h
 delete mode 100644 modules/bindings/src/tmalign/OST_INFO

diff --git a/modules/bindings/doc/tmtools.rst b/modules/bindings/doc/tmtools.rst
index 823a35eca..76e3f5d48 100644
--- a/modules/bindings/doc/tmtools.rst
+++ b/modules/bindings/doc/tmtools.rst
@@ -18,13 +18,14 @@ Citation:
   Y. Zhang and J. Skolnick, Nucl. Acids Res. 2005 33, 2302-9
 
 Besides using the standalone TM-align program, ost also provides a wrapper 
-around TM-align as published in:
+around USalign as published in:
 
-  Sha Gong, Chengxin Zhang, Yang Zhang, Bioinformatics 2019 
+  Chengxin Zhang, Morgan Shine, Anna Marie Pyle, Yang Zhang
+  (2022) Nat Methods
 
 The advantage is that no intermediate files must be generated, a wrapper on the
-c++ layer is used instead. However, only the basic TM-align superposition
-functionality is available.
+c++ layer is used instead. However, only the basic TM-align superposition between
+single chains is available.
 
 
 
@@ -122,9 +123,12 @@ generated in order to call the executable.
   The positions and sequences are directly extracted from the chain
   residues for every residue that fulfills:
   
-    * peptide linking
+    * peptide linking and valid CA atom OR nucleotide linking and valid C3'
+      atom
     * valid one letter code(no '?')
-    * valid CA atom
+
+  The function automatically identifies whether the chains consist of peptide
+  or RNA residues. An error is raised if the two types are mixed.
 
   :param chain1:        Chain from which position and sequence are extracted
                         to run TMalign.
@@ -137,20 +141,22 @@ generated in order to call the executable.
   :rtype:               :class:`ost.bindings.TMAlignResult`
 
 
-.. method:: WrappedTMAlign(pos1, pos2, seq1, seq2 [fast=False])
+.. method:: WrappedTMAlign(pos1, pos2, seq1, seq2 [fast=False, rna=False])
 
   Similar as described above, but directly feeding in raw data.
 
-  :param pos1:          CA positions of the first chain
-  :param pos2:          CA positions of the second chain, this is the reference.
+  :param pos1:          CA/C3' positions of the first chain
+  :param pos2:          CA/C3' positions of the second chain, this is the reference.
   :param seq1:          Sequence of first chain
   :param seq2:          Sequence of second chain
   :param fast:          Whether to apply the *fast* flag to TMAlign
+  :param rna:           Whether to treat as RNA
   :type pos1:           :class:`ost.geom.Vec3List`
   :type pos2:           :class:`ost.geom.Vec3List`
   :type seq1:           :class:`ost.seq.SequenceHandle`
   :type seq2:           :class:`ost.seq.SequenceHandle`
   :type fast:           :class:`bool`
+  :type rna:            :class:`bool`
   :rtype:               :class:`ost.bindings.TMAlignResult`
   :raises:              :class:`ost.Error` if pos1 and seq1, pos2 and seq2 
                         respectively are not consistent in size.
diff --git a/modules/bindings/pymod/export_tmalign.cc b/modules/bindings/pymod/export_tmalign.cc
index aefe33ec7..f6d94a2d7 100644
--- a/modules/bindings/pymod/export_tmalign.cc
+++ b/modules/bindings/pymod/export_tmalign.cc
@@ -26,9 +26,9 @@ ost::bindings::TMAlignResult WrapTMAlignPos(const geom::Vec3List& pos_one,
                                             const geom::Vec3List& pos_two, 
                                             const ost::seq::SequenceHandle& seq1,
                                             const ost::seq::SequenceHandle& seq2,
-                                            bool fast) {
+                                            bool fast, bool rna) {
 
-  return ost::bindings::WrappedTMAlign(pos_one, pos_two, seq1, seq2, fast);
+  return ost::bindings::WrappedTMAlign(pos_one, pos_two, seq1, seq2, fast, rna);
 }
 
 ost::bindings::TMAlignResult WrapTMAlignView(const ost::mol::ChainView& chain1,
@@ -51,7 +51,7 @@ void export_TMAlign() {
   ;
 
   def("WrappedTMAlign", &WrapTMAlignPos, (arg("pos1"), arg("pos2"), arg("seq1"), arg("seq2"),
-                                          arg("fast")=false));
+                                          arg("fast")=false, arg("rna")=false));
 
   def("WrappedTMAlign", &WrapTMAlignView, (arg("chain1"), arg("chain2"),
                                            arg("fast")=false));
diff --git a/modules/bindings/src/tmalign/BLOSUM.h b/modules/bindings/src/USalign/BLOSUM.h
similarity index 100%
rename from modules/bindings/src/tmalign/BLOSUM.h
rename to modules/bindings/src/USalign/BLOSUM.h
diff --git a/modules/bindings/src/USalign/Dockerfile b/modules/bindings/src/USalign/Dockerfile
new file mode 100644
index 000000000..26c4bf271
--- /dev/null
+++ b/modules/bindings/src/USalign/Dockerfile
@@ -0,0 +1,25 @@
+FROM gcc:12.2 as build
+COPY . /usr/src/usalign
+WORKDIR /usr/src/usalign
+RUN make -j
+RUN strip qTMclust USalign TMalign TMscore MMalign se pdb2xyz xyz_sfetch pdb2fasta pdb2ss NWalign HwRMSD cif2pdb
+
+# Don't use alpine since we need ubuntu's support
+FROM ubuntu:latest
+RUN mkdir /usr/bin/usalign
+WORKDIR /usr/bin/usalign
+COPY --from=build /usr/src/usalign/qTMclust /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/USalign  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/TMalign  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/TMscore  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/MMalign  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/se  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/pdb2xyz  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/xyz_sfetch  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/pdb2fasta  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/pdb2ss  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/NWalign  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/HwRMSD  /usr/bin/usalign/
+COPY --from=build /usr/src/usalign/cif2pdb /usr/bin/usalign/
+
+CMD "/bin/bash"
diff --git a/modules/bindings/src/tmalign/HwRMSD.cpp b/modules/bindings/src/USalign/HwRMSD.cpp
similarity index 100%
rename from modules/bindings/src/tmalign/HwRMSD.cpp
rename to modules/bindings/src/USalign/HwRMSD.cpp
diff --git a/modules/bindings/src/tmalign/HwRMSD.h b/modules/bindings/src/USalign/HwRMSD.h
similarity index 96%
rename from modules/bindings/src/tmalign/HwRMSD.h
rename to modules/bindings/src/USalign/HwRMSD.h
index 8a29399cd..8e0d0b2e4 100644
--- a/modules/bindings/src/tmalign/HwRMSD.h
+++ b/modules/bindings/src/USalign/HwRMSD.h
@@ -140,7 +140,7 @@ int HwRMSD_main(double **xa, double **ya, const char *seqx, const char *seqy,
 
         if (n_ali8_tmp==0)
         {
-            cerr<<"WARNING! zero aligned residue in iteration "<<iter<<endl;
+            //cerr<<"WARNING! zero aligned residue in iteration "<<iter<<endl;
             if (xlen>=ylen) seqxA_tmp=(string)(seqx);
             if (xlen<=ylen) seqyA_tmp=(string)(seqy);
             if (xlen<ylen)
diff --git a/modules/bindings/src/tmalign/Kabsch.h b/modules/bindings/src/USalign/Kabsch.h
similarity index 100%
rename from modules/bindings/src/tmalign/Kabsch.h
rename to modules/bindings/src/USalign/Kabsch.h
diff --git a/modules/bindings/src/tmalign/MMalign.cpp b/modules/bindings/src/USalign/MMalign.cpp
similarity index 81%
rename from modules/bindings/src/tmalign/MMalign.cpp
rename to modules/bindings/src/USalign/MMalign.cpp
index 6cc485647..816798b24 100644
--- a/modules/bindings/src/tmalign/MMalign.cpp
+++ b/modules/bindings/src/USalign/MMalign.cpp
@@ -9,7 +9,7 @@ void print_version()
     cout << 
 "\n"
 " **********************************************************************\n"
-" * MM-align (Version 20200519): complex structure alignment           *\n"
+" * MM-align (Version 20220412): complex structure alignment           *\n"
 " * References: S Mukherjee, Y Zhang. Nucl Acids Res 37(11):e83 (2009) *\n"
 " * Please email comments and suggestions to yangzhanglab@umich.edu    *\n"
 " **********************************************************************"
@@ -440,36 +440,34 @@ int main(int argc, char *argv[])
 
         t2 = clock();
         float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
-        printf("Total CPU time is %5.2f seconds\n", diff);
+        printf("#Total CPU time is %5.2f seconds\n", diff);
         return 0;
     }
 
     /* declare TM-score tables */
     int chain1_num=xa_vec.size();
     int chain2_num=ya_vec.size();
-    double **TM1_mat;
-    double **TM2_mat;
+    vector<string> tmp_str_vec(chain2_num,"");
     double **TMave_mat;
     double **ut_mat; // rotation matrices for all-against-all alignment
     int ui,uj,ut_idx;
-    NewArray(&TM1_mat,chain1_num,chain2_num);
-    NewArray(&TM2_mat,chain1_num,chain2_num);
     NewArray(&TMave_mat,chain1_num,chain2_num);
     NewArray(&ut_mat,chain1_num*chain2_num,4*3);
-    vector<string> tmp_str_vec(chain2_num,"");
     vector<vector<string> >seqxA_mat(chain1_num,tmp_str_vec);
     vector<vector<string> > seqM_mat(chain1_num,tmp_str_vec);
     vector<vector<string> >seqyA_mat(chain1_num,tmp_str_vec);
-    tmp_str_vec.clear();
+
+    double maxTMmono=-1;
+    int maxTMmono_i,maxTMmono_j;
 
     /* get all-against-all alignment */
+    if (len_aa+len_na>500) fast_opt=true;
     for (i=0;i<chain1_num;i++)
     {
         xlen=xlen_vec[i];
         if (xlen<3)
         {
-            for (j=0;j<chain2_num;j++)
-                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+            for (j=0;j<chain2_num;j++) TMave_mat[i][j]=-1;
             continue;
         }
         seqx = new char[xlen+1];
@@ -489,14 +487,14 @@ int main(int argc, char *argv[])
 
             if (mol_vec1[i]*mol_vec2[j]<0) //no protein-RNA alignment
             {
-                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+                TMave_mat[i][j]=-1;
                 continue;
             }
 
             ylen=ylen_vec[j];
             if (ylen<3)
             {
-                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+                TMave_mat[i][j]=-1;
                 continue;
             }
             seqy = new char[ylen+1];
@@ -530,18 +528,22 @@ int main(int argc, char *argv[])
                 seqM, seqxA, seqyA,
                 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
                 xlen, ylen, sequence, Lnorm_tmp, d0_scale,
-                0, false, true, false, true,
+                0, false, true, false, fast_opt,
                 mol_vec1[i]+mol_vec2[j],TMcut);
 
             /* store result */
             for (ui=0;ui<3;ui++)
                 for (uj=0;uj<3;uj++) ut_mat[ut_idx][ui*3+uj]=u0[ui][uj];
             for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=t0[uj];
-            TM1_mat[i][j]=TM2; // normalized by chain1
-            TM2_mat[i][j]=TM1; // normalized by chain2
             seqxA_mat[i][j]=seqxA;
             seqyA_mat[i][j]=seqyA;
             TMave_mat[i][j]=TM4*Lnorm_tmp;
+            if (TMave_mat[i][j]>maxTMmono)
+            {
+                maxTMmono=TMave_mat[i][j];
+                maxTMmono_i=i;
+                maxTMmono_j=j;
+            }
 
             /* clean up */
             seqM.clear();
@@ -568,8 +570,7 @@ int main(int argc, char *argv[])
     if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain");
 
     /* refine alignment for large oligomers */
-    int aln_chain_num=0;
-    for (i=0;i<chain1_num;i++) aln_chain_num+=(assign1_list[i]>=0);
+    int aln_chain_num=count_assign_pair(assign1_list,chain1_num);
     bool is_oligomer=(aln_chain_num>=3);
     if (aln_chain_num==2) // dimer alignment
     {
@@ -617,22 +618,90 @@ int main(int argc, char *argv[])
         DeleteArray(&xcentroids, chain1_num);
         DeleteArray(&ycentroids, chain2_num);
     }
-    if (len_aa+len_na>1000) fast_opt=true;
+
+    /* store initial assignment */
+    int init_pair_num=count_assign_pair(assign1_list,chain1_num);
+    int *assign1_init, *assign2_init;
+    assign1_init=new int[chain1_num];
+    assign2_init=new int[chain2_num];
+    double **TMave_init;
+    NewArray(&TMave_init,chain1_num,chain2_num);
+    vector<vector<string> >seqxA_init(chain1_num,tmp_str_vec);
+    vector<vector<string> >seqyA_init(chain1_num,tmp_str_vec);
+    vector<string> sequence_init;
+    copy_chain_assign_data(chain1_num, chain2_num, sequence_init,
+        seqxA_mat,  seqyA_mat,  assign1_list, assign2_list, TMave_mat,
+        seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init);
 
     /* perform iterative alignment */
-    for (int iter=0;iter<1;iter++)
+    double max_total_score=0; // ignore old total_score because previous
+                              // score was from monomeric chain superpositions
+    int max_iter=5-(int)((len_aa+len_na)/200);
+    if (max_iter<2) max_iter=2;
+    MMalign_iter(max_total_score, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num,
+        TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence,
+        d0_scale, fast_opt);
+
+    /* sometime MMalign_iter is even worse than monomer alignment */
+    if (max_total_score<maxTMmono)
     {
-        total_score=MMalign_search(xa_vec, ya_vec, seqx_vec, seqy_vec,
+        copy_chain_assign_data(chain1_num, chain2_num, sequence,
+            seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init,
+            seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat);
+        for (i=0;i<chain1_num;i++)
+        {
+            if (i!=maxTMmono_i) assign1_list[i]=-1;
+            else assign1_list[i]=maxTMmono_j;
+        }
+        for (j=0;j<chain2_num;j++)
+        {
+            if (j!=maxTMmono_j) assign2_list[j]=-1;
+            else assign2_list[j]=maxTMmono_i;
+        }
+        sequence[0]=seqxA_mat[maxTMmono_i][maxTMmono_j];
+        sequence[1]=seqyA_mat[maxTMmono_i][maxTMmono_j];
+        max_total_score=maxTMmono;
+        MMalign_iter(max_total_score, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec,
             secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
-            xa, ya, seqx, seqy, secx, secy, len_aa, len_na,
-            chain1_num, chain2_num, TM1_mat, TM2_mat, TMave_mat,
-            seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence,
-            d0_scale, true);
-        total_score=enhanced_greedy_search(TMave_mat, assign1_list,
-            assign2_list, chain1_num, chain2_num);
-        if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain");
+            xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num,
+            TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence,
+            d0_scale, fast_opt);
     }
 
+    /* perform cross chain alignment
+     * in some cases, this leads to dramatic improvement, esp for homodimer */
+    int iter_pair_num=count_assign_pair(assign1_list,chain1_num);
+    if (iter_pair_num>=init_pair_num) copy_chain_assign_data(
+        chain1_num, chain2_num, sequence_init,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat,
+        seqxA_init, seqyA_init, assign1_init,  assign2_init,  TMave_init);
+    double max_total_score_cross=max_total_score;
+
+    //if (init_pair_num!=2 && is_oligomer==false) MMalign_cross(
+        //max_total_score_cross, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec,
+        //secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        //xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num,
+        //TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init, sequence_init,
+        //d0_scale, true);
+    //else 
+    if (len_aa+len_na<10000)
+    {
+        MMalign_dimer(max_total_score_cross, xa_vec, ya_vec, seqx_vec, seqy_vec,
+            secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+            xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num,
+            TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init,
+            sequence_init, d0_scale, fast_opt);
+        if (max_total_score_cross>max_total_score) 
+        {
+            max_total_score=max_total_score_cross;
+            copy_chain_assign_data(chain1_num, chain2_num, sequence,
+                seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init,
+                seqxA_mat,  seqyA_mat,  assign1_list, assign2_list, TMave_mat);
+        }
+    } 
+
     /* final alignment */
     if (outfmt_opt==0) print_version();
     MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()),
@@ -641,7 +710,7 @@ int main(int argc, char *argv[])
         xa_vec, ya_vec, seqx_vec, seqy_vec,
         secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
         xa, ya, seqx, seqy, secx, secy, len_aa, len_na,
-        chain1_num, chain2_num, TM1_mat, TM2_mat, TMave_mat,
+        chain1_num, chain2_num, TMave_mat,
         seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence,
         d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt,
         a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2);
@@ -649,13 +718,18 @@ int main(int argc, char *argv[])
     /* clean up everything */
     delete [] assign1_list;
     delete [] assign2_list;
-    DeleteArray(&TM1_mat,  chain1_num);
-    DeleteArray(&TM2_mat,  chain1_num);
     DeleteArray(&TMave_mat,chain1_num);
     DeleteArray(&ut_mat,   chain1_num*chain2_num);
     vector<vector<string> >().swap(seqxA_mat);
     vector<vector<string> >().swap(seqM_mat);
     vector<vector<string> >().swap(seqyA_mat);
+    vector<string>().swap(tmp_str_vec);
+
+    delete [] assign1_init;
+    delete [] assign2_init;
+    DeleteArray(&TMave_init,chain1_num);
+    vector<vector<string> >().swap(seqxA_init);
+    vector<vector<string> >().swap(seqyA_init);
 
     vector<vector<vector<double> > >().swap(xa_vec); // structure of complex1
     vector<vector<vector<double> > >().swap(ya_vec); // structure of complex2
@@ -672,9 +746,11 @@ int main(int argc, char *argv[])
     vector<string>().swap(chain1_list);
     vector<string>().swap(chain2_list);
     vector<string>().swap(sequence);
+    vector<string>().swap(resi_vec1);  // residue index for chain1
+    vector<string>().swap(resi_vec2);  // residue index for chain2
 
     t2 = clock();
     float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
-    printf("Total CPU time is %5.2f seconds\n", diff);
+    printf("#Total CPU time is %5.2f seconds\n", diff);
     return 0;
 }
diff --git a/modules/bindings/src/USalign/MMalign.h b/modules/bindings/src/USalign/MMalign.h
new file mode 100644
index 000000000..4b480da62
--- /dev/null
+++ b/modules/bindings/src/USalign/MMalign.h
@@ -0,0 +1,3040 @@
+#include <cfloat>
+#include "se.h"
+
+/* count the number of nucleic acid chains (na_chain_num) and
+ * protein chains (aa_chain_num) in a complex */
+int count_na_aa_chain_num(int &na_chain_num,int &aa_chain_num,
+    const vector<int>&mol_vec)
+{
+    na_chain_num=0;
+    aa_chain_num=0;
+    for (size_t i=0;i<mol_vec.size();i++)
+    {
+        if (mol_vec[i]>0) na_chain_num++;
+        else              aa_chain_num++;
+    }
+    return na_chain_num+aa_chain_num;
+}
+
+/* adjust chain assignment for dimer-dimer alignment 
+ * return true if assignment is adjusted */
+bool adjust_dimer_assignment(        
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<int>&xlen_vec, const vector<int>&ylen_vec,
+    const vector<int>&mol_vec1, const vector<int>&mol_vec2,
+    int *assign1_list, int *assign2_list,
+    const vector<vector<string> >&seqxA_mat,
+    const vector<vector<string> >&seqyA_mat)
+{
+    /* check currently assigned chains */
+    int i1,i2,j1,j2;
+    i1=i2=j1=j2=-1;    
+    int chain1_num=xa_vec.size();
+    int i,j;
+    for (i=0;i<chain1_num;i++)
+    {
+        if (assign1_list[i]>=0)
+        {
+            if (i1<0)
+            {
+                i1=i;
+                j1=assign1_list[i1];
+            }
+            else
+            {
+                i2=i;
+                j2=assign1_list[i2];
+            }
+        }
+    }
+
+    /* normalize d0 by L */
+    int xlen=xlen_vec[i1]+xlen_vec[i2];
+    int ylen=ylen_vec[j1]+ylen_vec[j2];
+    int mol_type=mol_vec1[i1]+mol_vec1[i2]+
+                 mol_vec2[j1]+mol_vec2[j2];
+    double D0_MIN, d0, d0_search;
+    double Lnorm=getmin(xlen,ylen);
+    parameter_set4final(getmin(xlen,ylen), D0_MIN, Lnorm, d0, 
+        d0_search, mol_type);
+
+    double **xa,**ya, **xt;
+    NewArray(&xa, xlen, 3);
+    NewArray(&ya, ylen, 3);
+    NewArray(&xt, xlen, 3);
+
+    double RMSD = 0;
+    double dd   = 0;
+    double t[3];
+    double u[3][3];
+    size_t L_ali=0; // index of residue in aligned region
+    size_t r=0;     // index of residue in full alignment
+
+    /* total score using current assignment */
+    L_ali=0;
+    i=j=-1;
+    for (r=0;r<seqxA_mat[i1][j1].size();r++)
+    {
+        i+=(seqxA_mat[i1][j1][r]!='-');
+        j+=(seqyA_mat[i1][j1][r]!='-');
+        if (seqxA_mat[i1][j1][r]=='-' || seqyA_mat[i1][j1][r]=='-') continue;
+        xa[L_ali][0]=xa_vec[i1][i][0];
+        xa[L_ali][1]=xa_vec[i1][i][1];
+        xa[L_ali][2]=xa_vec[i1][i][2];
+        ya[L_ali][0]=ya_vec[j1][j][0];
+        ya[L_ali][1]=ya_vec[j1][j][1];
+        ya[L_ali][2]=ya_vec[j1][j][2];
+        L_ali++;
+    }
+    i=j=-1;
+    for (r=0;r<seqxA_mat[i2][j2].size();r++)
+    {
+        i+=(seqxA_mat[i2][j2][r]!='-');
+        j+=(seqyA_mat[i2][j2][r]!='-');
+        if (seqxA_mat[i2][j2][r]=='-' || seqyA_mat[i2][j2][r]=='-') continue;
+        xa[L_ali][0]=xa_vec[i2][i][0];
+        xa[L_ali][1]=xa_vec[i2][i][1];
+        xa[L_ali][2]=xa_vec[i2][i][2];
+        ya[L_ali][0]=ya_vec[j2][j][0];
+        ya[L_ali][1]=ya_vec[j2][j][1];
+        ya[L_ali][2]=ya_vec[j2][j][2];
+        L_ali++;
+    }
+
+    Kabsch(xa, ya, L_ali, 1, &RMSD, t, u);
+    do_rotation(xa, xt, L_ali, t, u);
+
+    double total_score1=0;
+    for (r=0;r<L_ali;r++)
+    {
+        dd=dist(xt[r],ya[r]);
+        total_score1+=1/(1+dd/d0*d0);
+    }
+    total_score1/=Lnorm;
+
+    /* total score using reversed assignment */
+    L_ali=0;
+    i=j=-1;
+    for (r=0;r<seqxA_mat[i1][j2].size();r++)
+    {
+        i+=(seqxA_mat[i1][j2][r]!='-');
+        j+=(seqyA_mat[i1][j2][r]!='-');
+        if (seqxA_mat[i1][j2][r]=='-' || seqyA_mat[i1][j2][r]=='-') continue;
+        xa[L_ali][0]=xa_vec[i1][i][0];
+        xa[L_ali][1]=xa_vec[i1][i][1];
+        xa[L_ali][2]=xa_vec[i1][i][2];
+        ya[L_ali][0]=ya_vec[j2][j][0];
+        ya[L_ali][1]=ya_vec[j2][j][1];
+        ya[L_ali][2]=ya_vec[j2][j][2];
+        L_ali++;
+    }
+    i=j=-1;
+    for (r=0;r<seqxA_mat[i2][j1].size();r++)
+    {
+        i+=(seqxA_mat[i2][j1][r]!='-');
+        j+=(seqyA_mat[i2][j1][r]!='-');
+        if (seqxA_mat[i2][j1][r]=='-' || seqyA_mat[i2][j1][r]=='-') continue;
+        xa[L_ali][0]=xa_vec[i2][i][0];
+        xa[L_ali][1]=xa_vec[i2][i][1];
+        xa[L_ali][2]=xa_vec[i2][i][2];
+        ya[L_ali][0]=ya_vec[j1][j][0];
+        ya[L_ali][1]=ya_vec[j1][j][1];
+        ya[L_ali][2]=ya_vec[j1][j][2];
+        L_ali++;
+    }
+
+    Kabsch(xa, ya, L_ali, 1, &RMSD, t, u);
+    do_rotation(xa, xt, L_ali, t, u);
+
+    double total_score2=0;
+    for (r=0;r<L_ali;r++)
+    {
+        dd=dist(xt[r],ya[r]);
+        total_score2+=1/(1+dd/d0*d0);
+    }
+    total_score2/=Lnorm;
+
+    /* swap chain assignment */
+    if (total_score1<total_score2)
+    {
+        assign1_list[i1]=j2;
+        assign1_list[i2]=j1;
+        assign2_list[j1]=i2;
+        assign2_list[j2]=i1;
+    }
+
+    /* clean up */
+    DeleteArray(&xa, xlen);
+    DeleteArray(&ya, ylen);
+    DeleteArray(&xt, xlen);
+    return total_score1<total_score2;
+}
+
+/* count how many chains are paired */
+int count_assign_pair(int *assign1_list,const int chain1_num)
+{
+    int pair_num=0;
+    int i;
+    for (i=0;i<chain1_num;i++) pair_num+=(assign1_list[i]>=0);
+    return pair_num;
+}
+
+
+/* assign chain-chain correspondence */
+double enhanced_greedy_search(double **TMave_mat,int *assign1_list,
+    int *assign2_list, const int chain1_num, const int chain2_num)
+{
+    double total_score=0;
+    double tmp_score=0;
+    int i,j;
+    int maxi=0;
+    int maxj=0;
+
+    /* initialize parameters */
+    for (i=0;i<chain1_num;i++) assign1_list[i]=-1;
+    for (j=0;j<chain2_num;j++) assign2_list[j]=-1;
+
+    /* greedy assignment: in each iteration, the highest chain pair is
+     * assigned, until no assignable chain is left */
+    while(1)
+    {
+        tmp_score=-1;
+        for (i=0;i<chain1_num;i++)
+        {
+            if (assign1_list[i]>=0) continue;
+            for (j=0;j<chain2_num;j++)
+            {
+                if (assign2_list[j]>=0 || TMave_mat[i][j]<=0) continue;
+                if (TMave_mat[i][j]>tmp_score) 
+                {
+                    maxi=i;
+                    maxj=j;
+                    tmp_score=TMave_mat[i][j];
+                }
+            }
+        }
+        if (tmp_score<=0) break; // error: no assignable chain
+        assign1_list[maxi]=maxj;
+        assign2_list[maxj]=maxi;
+        total_score+=tmp_score;
+    }
+    if (total_score<=0) return total_score; // error: no assignable chain
+    //cout<<"assign1_list={";
+    //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
+    //cout<<"assign2_list={";
+    //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
+
+    /* iterative refinemnt */
+    double delta_score;
+    int *assign1_tmp=new int [chain1_num];
+    int *assign2_tmp=new int [chain2_num];
+    for (i=0;i<chain1_num;i++) assign1_tmp[i]=assign1_list[i];
+    for (j=0;j<chain2_num;j++) assign2_tmp[j]=assign2_list[j];
+    int old_i=-1;
+    int old_j=-1;
+
+    for (int iter=0;iter<getmin(chain1_num,chain2_num)*5;iter++)
+    {
+        delta_score=-1;
+        for (i=0;i<chain1_num;i++)
+        {
+            old_j=assign1_list[i];
+            for (j=0;j<chain2_num;j++)
+            {
+                // attempt to swap (i,old_j=assign1_list[i]) with (i,j)
+                if (j==assign1_list[i] || TMave_mat[i][j]<=0) continue;
+                old_i=assign2_list[j];
+
+                assign1_tmp[i]=j;
+                if (old_i>=0) assign1_tmp[old_i]=old_j;
+                assign2_tmp[j]=i;
+                if (old_j>=0) assign2_tmp[old_j]=old_i;
+
+                delta_score=TMave_mat[i][j];
+                if (old_j>=0) delta_score-=TMave_mat[i][old_j];
+                if (old_i>=0) delta_score-=TMave_mat[old_i][j];
+                if (old_i>=0 && old_j>=0) delta_score+=TMave_mat[old_i][old_j];
+
+                if (delta_score>0) // successful swap
+                {
+                    assign1_list[i]=j;
+                    if (old_i>=0) assign1_list[old_i]=old_j;
+                    assign2_list[j]=i;
+                    if (old_j>=0) assign2_list[old_j]=old_i;
+                    total_score+=delta_score;
+                    break;
+                }
+                else
+                {
+                    assign1_tmp[i]=assign1_list[i];
+                    if (old_i>=0) assign1_tmp[old_i]=assign1_list[old_i];
+                    assign2_tmp[j]=assign2_list[j];
+                    if (old_j>=0) assign2_tmp[old_j]=assign2_list[old_j];
+                }
+            }
+            if (delta_score>0) break;
+        }
+        if (delta_score<=0) break; // cannot swap any chain pair
+    }
+
+    /* clean up */
+    delete[]assign1_tmp;
+    delete[]assign2_tmp;
+    return total_score;
+}
+
+double calculate_centroids(const vector<vector<vector<double> > >&a_vec,
+    const int chain_num, double ** centroids)
+{
+    int L=0;
+    int c,r; // index of chain and residue
+    for (c=0; c<chain_num; c++)
+    {
+        centroids[c][0]=0;
+        centroids[c][1]=0;
+        centroids[c][2]=0;
+        L=a_vec[c].size();
+        for (r=0; r<L; r++)
+        {
+            centroids[c][0]+=a_vec[c][r][0];
+            centroids[c][1]+=a_vec[c][r][1];
+            centroids[c][2]+=a_vec[c][r][2];
+        }
+        centroids[c][0]/=L;
+        centroids[c][1]/=L;
+        centroids[c][2]/=L;
+        //cout<<centroids[c][0]<<'\t'
+            //<<centroids[c][1]<<'\t'
+            //<<centroids[c][2]<<endl;
+    }
+
+    vector<double> d0_vec(chain_num,-1);
+    int c2=0;
+    double d0MM=0;
+    for (c=0; c<chain_num; c++)
+    {
+        for (c2=0; c2<chain_num; c2++)
+        {
+            if (c2==c) continue;
+            d0MM=sqrt(dist(centroids[c],centroids[c2]));
+            if (d0_vec[c]<=0) d0_vec[c]=d0MM;
+            else d0_vec[c]=getmin(d0_vec[c], d0MM);
+        }
+    }
+    d0MM=0;
+    for (c=0; c<chain_num; c++) d0MM+=d0_vec[c];
+    d0MM/=chain_num;
+    d0_vec.clear();
+    //cout<<d0MM<<endl;
+    return d0MM;
+}
+
+/* calculate MMscore of aligned chains
+ * MMscore = sum(TMave_mat[i][j]) * sum(1/(1+dij^2/d0MM^2)) 
+ *         / (L* getmin(chain1_num,chain2_num))
+ * dij is the centroid distance between chain pair i and j
+ * d0MM is scaling factor. TMave_mat[i][j] is the TM-score between
+ * chain pair i and j multiple by getmin(Li*Lj) */
+double calMMscore(double **TMave_mat,int *assign1_list,
+    const int chain1_num, const int chain2_num, double **xcentroids,
+    double **ycentroids, const double d0MM, double **r1, double **r2,
+    double **xt, double t[3], double u[3][3], const int L)
+{
+    int Nali=0; // number of aligned chain
+    int i,j;
+    double MMscore=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+
+        r1[Nali][0]=xcentroids[i][0];
+        r1[Nali][1]=xcentroids[i][1];
+        r1[Nali][2]=xcentroids[i][2];
+
+        r2[Nali][0]=ycentroids[j][0];
+        r2[Nali][1]=ycentroids[j][1];
+        r2[Nali][2]=ycentroids[j][2];
+
+        Nali++;
+        MMscore+=TMave_mat[i][j];
+    }
+    MMscore/=L;
+
+    double RMSD = 0;
+    double TMscore=0;
+    if (Nali>=3)
+    {
+        /* Kabsch superposition */
+        Kabsch(r1, r2, Nali, 1, &RMSD, t, u);
+        do_rotation(r1, xt, Nali, t, u);
+
+        /* calculate pseudo-TMscore */
+        double dd=0;
+        for (i=0;i<Nali;i++)
+        {
+            dd=dist(xt[i], r2[i]);
+            TMscore+=1/(1+dd/(d0MM*d0MM));
+        }
+    }
+    else if (Nali==2)
+    {
+        double dd=dist(r1[0],r2[0]);
+        TMscore=1/(1+dd/(d0MM*d0MM));
+    }
+    else TMscore=1; // only one aligned chain.
+    TMscore/=getmin(chain1_num,chain2_num);
+    MMscore*=TMscore;
+    return MMscore;
+}
+
+/* check if this is alignment of heterooligomer or homooligomer
+ * return het_deg, which ranges from 0 to 1.
+ * The larger the value, the more "hetero"; 
+ * Tthe smaller the value, the more "homo" */
+double check_heterooligomer(double **TMave_mat, const int chain1_num,
+    const int chain2_num)
+{
+    double het_deg=0;
+    double min_TM=-1;
+    double max_TM=-1;
+    int i,j;
+    for (i=0;i<chain1_num;i++)
+    {
+        for (j=0;j<chain2_num;j++)
+        {
+            if (min_TM<0 || TMave_mat[i][j] <min_TM) min_TM=TMave_mat[i][j];
+            if (max_TM<0 || TMave_mat[i][j]>=max_TM) max_TM=TMave_mat[i][j];
+        }
+    }
+    het_deg=(max_TM-min_TM)/max_TM;
+    //cout<<"min_TM="<<min_TM<<endl;
+    //cout<<"max_TM="<<max_TM<<endl;
+    return het_deg;
+}
+
+/* reassign chain-chain correspondence, specific for homooligomer */
+double homo_refined_greedy_search(double **TMave_mat,int *assign1_list,
+    int *assign2_list, const int chain1_num, const int chain2_num,
+    double **xcentroids, double **ycentroids, const double d0MM,
+    const int L, double **ut_mat)
+{
+    double MMscore_max=0;
+    double MMscore=0;
+    int i,j;
+    int c1,c2;
+    int max_i=-1; // the chain pair whose monomer u t yields highest MMscore
+    int max_j=-1;
+
+    int chain_num=getmin(chain1_num,chain2_num);
+    int *assign1_tmp=new int [chain1_num];
+    int *assign2_tmp=new int [chain2_num];
+    double **xt;
+    NewArray(&xt, chain1_num, 3);
+    double t[3];
+    double u[3][3];
+    int ui,uj,ut_idx;
+    double TMscore=0; // pseudo TM-score
+    double TMsum  =0;
+    double TMnow  =0;
+    double TMmax  =0;
+    double dd=0;
+
+    size_t  total_pair=chain1_num*chain2_num; // total pair
+    double *ut_tmc_mat=new double [total_pair]; // chain level TM-score
+    vector<pair<double,int> > ut_tm_vec(total_pair,make_pair(0.0,0)); // product of both
+
+    for (c1=0;c1<chain1_num;c1++)
+    {
+        for (c2=0;c2<chain2_num;c2++)
+        {
+            if (TMave_mat[c1][c2]<=0) continue;
+            ut_idx=c1*chain2_num+c2;
+            for (ui=0;ui<3;ui++)
+                for (uj=0;uj<3;uj++) u[ui][uj]=ut_mat[ut_idx][ui*3+uj];
+            for (uj=0;uj<3;uj++) t[uj]=ut_mat[ut_idx][9+uj];
+            
+            do_rotation(xcentroids, xt, chain1_num, t, u);
+
+            for (i=0;i<chain1_num;i++) assign1_tmp[i]=-1;
+            for (j=0;j<chain2_num;j++) assign2_tmp[j]=-1;
+
+
+            for (i=0;i<chain1_num;i++)
+            {
+                for (j=0;j<chain2_num;j++)
+                {
+                    ut_idx=i*chain2_num+j;
+                    ut_tmc_mat[ut_idx]=0;
+                    ut_tm_vec[ut_idx].first=-1;
+                    ut_tm_vec[ut_idx].second=ut_idx;
+                    if (TMave_mat[i][j]<=0) continue;
+                    dd=dist(xt[i],ycentroids[j]);
+                    ut_tmc_mat[ut_idx]=1/(1+dd/(d0MM*d0MM));
+                    ut_tm_vec[ut_idx].first=
+                        ut_tmc_mat[ut_idx]*TMave_mat[i][j];
+                    //cout<<"TM["<<ut_idx<<"]="<<ut_tm_vec[ut_idx].first<<endl;
+                }
+            }
+            //cout<<"sorting "<<total_pair<<" chain pairs"<<endl;
+
+            /* initial assignment */
+            assign1_tmp[c1]=c2;
+            assign2_tmp[c2]=c1;
+            TMsum=TMave_mat[c1][c2];
+            TMscore=ut_tmc_mat[c1*chain2_num+c2];
+
+            /* further assignment */
+            sort(ut_tm_vec.begin(), ut_tm_vec.end()); // sort in ascending order
+            for (ut_idx=total_pair-1;ut_idx>=0;ut_idx--)
+            {
+                j=ut_tm_vec[ut_idx].second % chain2_num;
+                i=int(ut_tm_vec[ut_idx].second / chain2_num);
+                if (TMave_mat[i][j]<=0) break;
+                if (assign1_tmp[i]>=0 || assign2_tmp[j]>=0) continue;
+                assign1_tmp[i]=j;
+                assign2_tmp[j]=i;
+                TMsum+=TMave_mat[i][j];
+                TMscore+=ut_tmc_mat[i*chain2_num+j];
+                //cout<<"ut_idx="<<ut_tm_vec[ut_idx].second
+                    //<<"\ti="<<i<<"\tj="<<j<<"\ttm="<<ut_tm_vec[ut_idx].first<<endl;
+            }
+
+            /* final MMscore */
+            MMscore=(TMsum/L)*(TMscore/chain_num);
+            if (max_i<0 || max_j<0 || MMscore>MMscore_max)
+            {
+                max_i=c1;
+                max_j=c2;
+                MMscore_max=MMscore;
+                for (i=0;i<chain1_num;i++) assign1_list[i]=assign1_tmp[i];
+                for (j=0;j<chain2_num;j++) assign2_list[j]=assign2_tmp[j];
+                //cout<<"TMsum/L="<<TMsum/L<<endl;
+                //cout<<"TMscore/chain_num="<<TMscore/chain_num<<endl;
+                //cout<<"MMscore="<<MMscore<<endl;
+                //cout<<"assign1_list={";
+                //for (i=0;i<chain1_num;i++) 
+                    //cout<<assign1_list[i]<<","; cout<<"}"<<endl;
+                //cout<<"assign2_list={";
+                //for (j=0;j<chain2_num;j++)
+                    //cout<<assign2_list[j]<<","; cout<<"}"<<endl;
+            }
+        }
+    }
+
+    /* clean up */
+    delete[]assign1_tmp;
+    delete[]assign2_tmp;
+    delete[]ut_tmc_mat;
+    ut_tm_vec.clear();
+    DeleteArray(&xt, chain1_num);
+    return MMscore;
+}
+
+/* reassign chain-chain correspondence, specific for heterooligomer */
+double hetero_refined_greedy_search(double **TMave_mat,int *assign1_list,
+    int *assign2_list, const int chain1_num, const int chain2_num,
+    double **xcentroids, double **ycentroids, const double d0MM, const int L)
+{
+    double MMscore_old=0;
+    double MMscore=0;
+    int i,j;
+
+    double **r1;
+    double **r2;
+    double **xt;
+    int chain_num=getmin(chain1_num,chain2_num);
+    NewArray(&r1, chain_num, 3);
+    NewArray(&r2, chain_num, 3);
+    NewArray(&xt, chain_num, 3);
+    double t[3];
+    double u[3][3];
+
+    /* calculate MMscore */
+    MMscore=MMscore_old=calMMscore(TMave_mat, assign1_list, chain1_num,
+        chain2_num, xcentroids, ycentroids, d0MM, r1, r2, xt, t, u, L);
+    //cout<<"MMscore="<<MMscore<<endl;
+    //cout<<"TMave_mat="<<endl;
+    //for (i=0;i<chain1_num;i++)
+    //{
+        //for (j=0; j<chain2_num; j++)
+        //{
+            //if (j<chain2_num-1) cout<<TMave_mat[i][j]<<'\t';
+            //else                cout<<TMave_mat[i][j]<<endl;
+        //}
+    //}
+
+    /* iteratively refine chain assignment. in each iteration, attempt
+     * to swap (i,old_j=assign1_list[i]) with (i,j) */
+    double delta_score=-1;
+    int *assign1_tmp=new int [chain1_num];
+    int *assign2_tmp=new int [chain2_num];
+    for (i=0;i<chain1_num;i++) assign1_tmp[i]=assign1_list[i];
+    for (j=0;j<chain2_num;j++) assign2_tmp[j]=assign2_list[j];
+    int old_i=-1;
+    int old_j=-1;
+
+    //cout<<"assign1_list={";
+    //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
+    //cout<<"assign2_list={";
+    //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
+
+    for (int iter=0;iter<chain1_num*chain2_num;iter++)
+    {
+        delta_score=-1;
+        for (i=0;i<chain1_num;i++)
+        {
+            old_j=assign1_list[i];
+            for (j=0;j<chain2_num;j++)
+            {
+                if (j==assign1_list[i] || TMave_mat[i][j]<=0) continue;
+                old_i=assign2_list[j];
+
+                assign1_tmp[i]=j;
+                if (old_i>=0) assign1_tmp[old_i]=old_j;
+                assign2_tmp[j]=i;
+                if (old_j>=0) assign2_tmp[old_j]=old_i;
+                
+                MMscore=calMMscore(TMave_mat, assign1_tmp, chain1_num,
+                    chain2_num, xcentroids, ycentroids, d0MM,
+                    r1, r2, xt, t, u, L);
+
+                //cout<<"(i,j,old_i,old_j,MMscore)=("<<i<<","<<j<<","
+                    //<<old_i<<","<<old_j<<","<<MMscore<<")"<<endl;
+
+                if (MMscore>MMscore_old) // successful swap
+                {
+                    assign1_list[i]=j;
+                    if (old_i>=0) assign1_list[old_i]=old_j;
+                    assign2_list[j]=i;
+                    if (old_j>=0) assign2_list[old_j]=old_i;
+                    delta_score=(MMscore-MMscore_old);
+                    MMscore_old=MMscore;
+                    //cout<<"MMscore="<<MMscore<<endl;
+                    break;
+                }
+                else
+                {
+                    assign1_tmp[i]=assign1_list[i];
+                    if (old_i>=0) assign1_tmp[old_i]=assign1_list[old_i];
+                    assign2_tmp[j]=assign2_list[j];
+                    if (old_j>=0) assign2_tmp[old_j]=assign2_list[old_j];
+                }
+            }
+        }
+        //cout<<"iter="<<iter<<endl;
+        //cout<<"assign1_list={";
+        //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
+        //cout<<"assign2_list={";
+        //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
+        if (delta_score<=0) break; // cannot swap any chain pair
+    }
+    MMscore=MMscore_old;
+    //cout<<"MMscore="<<MMscore<<endl;
+
+    /* clean up */
+    delete[]assign1_tmp;
+    delete[]assign2_tmp;
+    DeleteArray(&r1, chain_num);
+    DeleteArray(&r2, chain_num);
+    DeleteArray(&xt, chain_num);
+    return MMscore;
+}
+
+void copy_chain_data(const vector<vector<double> >&a_vec_i,
+    const vector<char>&seq_vec_i,const vector<char>&sec_vec_i,
+    const int len,double **a,char *seq,char *sec)
+{
+    int r;
+    for (r=0;r<len;r++)
+    {
+        a[r][0]=a_vec_i[r][0];
+        a[r][1]=a_vec_i[r][1];
+        a[r][2]=a_vec_i[r][2];
+        seq[r]=seq_vec_i[r];
+        sec[r]=sec_vec_i[r];
+    }
+    seq[len]=0;
+    sec[len]=0;
+}
+
+/* clear chains with L<3 */
+void clear_full_PDB_lines(vector<vector<string> > PDB_lines,const string atom_opt)
+{
+    int chain_i;
+    int Lch;
+    int a;
+    bool select_atom;
+    string line;
+    for (chain_i=0;chain_i<PDB_lines.size();chain_i++)
+    {
+        Lch=0;
+        for (a=0;a<PDB_lines[chain_i].size();a++)
+        {
+            line=PDB_lines[chain_i][a];
+            if (atom_opt=="auto")
+            {
+                if (line[17]==' ' && (line[18]=='D'||line[18]==' '))
+                     select_atom=(line.compare(12,4," C3'")==0);
+                else select_atom=(line.compare(12,4," CA ")==0);
+            }
+            else     select_atom=(line.compare(12,4,atom_opt)==0);
+            Lch+=select_atom;
+        }
+        if (Lch<3)
+        {
+            for (a=0;a<PDB_lines[chain_i].size();a++)
+                PDB_lines[chain_i][a].clear();
+            PDB_lines[chain_i].clear();
+        }
+    }
+    line.clear();
+}
+
+size_t get_full_PDB_lines(const string filename,
+    vector<vector<string> >&PDB_lines, const int ter_opt,
+    const int infmt_opt, const int split_opt, const int het_opt)
+{
+    size_t i=0; // resi i.e. atom index
+    string line;
+    char chainID=0;
+    vector<string> tmp_str_vec;
+    
+    int compress_type=0; // uncompressed file
+    ifstream fin;
+#ifndef REDI_PSTREAM_H_SEEN
+    ifstream fin_gz;
+#else
+    redi::ipstream fin_gz; // if file is compressed
+    if (filename.size()>=3 && 
+        filename.substr(filename.size()-3,3)==".gz")
+    {
+        fin_gz.open("gunzip -c '"+filename+"'");
+        compress_type=1;
+    }
+    else if (filename.size()>=4 && 
+        filename.substr(filename.size()-4,4)==".bz2")
+    {
+        fin_gz.open("bzcat '"+filename+"'");
+        compress_type=2;
+    }
+    else
+#endif
+        fin.open(filename.c_str());
+
+    if (infmt_opt==0||infmt_opt==-1) // PDB format
+    {
+        while (compress_type?fin_gz.good():fin.good())
+        {
+            if (compress_type) getline(fin_gz, line);
+            else               getline(fin, line);
+            if (infmt_opt==-1 && line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+                return get_full_PDB_lines(filename,PDB_lines,
+                    ter_opt, 3, split_opt,het_opt);
+            if (i > 0)
+            {
+                if      (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+                else if (ter_opt>=3 && line.compare(0,3,"TER")==0) break;
+            }
+            if (split_opt && line.compare(0,3,"END")==0) chainID=0;
+            if (line.size()>=54 && (line[16]==' ' || line[16]=='A') && (
+                (line.compare(0, 6, "ATOM  ")==0) || 
+                (line.compare(0, 6, "HETATM")==0 && het_opt==1) ||
+                (line.compare(0, 6, "HETATM")==0 && het_opt==2 && 
+                 line.compare(17,3, "MSE")==0)))
+            {
+                if (!chainID)
+                {
+                    chainID=line[21];
+                    PDB_lines.push_back(tmp_str_vec);
+                }
+                else if (ter_opt>=2 && chainID!=line[21]) break;
+                if (split_opt==2 && chainID!=line[21])
+                {
+                    chainID=line[21];
+                    PDB_lines.push_back(tmp_str_vec);
+                } 
+               
+                PDB_lines.back().push_back(line);
+                i++;
+            }
+        }
+    }
+    else if (infmt_opt==1) // SPICKER format
+    {
+        size_t L=0;
+        float x,y,z;
+        stringstream i8_stream;
+        while (compress_type?fin_gz.good():fin.good())
+        {
+            if (compress_type) fin_gz>>L>>x>>y>>z;
+            else               fin   >>L>>x>>y>>z;
+            if (compress_type) getline(fin_gz, line);
+            else               getline(fin, line);
+            if (!(compress_type?fin_gz.good():fin.good())) break;
+            for (i=0;i<L;i++)
+            {
+                if (compress_type) fin_gz>>x>>y>>z;
+                else               fin   >>x>>y>>z;
+                i8_stream<<"ATOM   "<<setw(4)<<i+1<<"  CA  UNK  "<<setw(4)
+                    <<i+1<<"    "<<setiosflags(ios::fixed)<<setprecision(3)
+                    <<setw(8)<<x<<setw(8)<<y<<setw(8)<<z;
+                line=i8_stream.str();
+                i8_stream.str(string());
+                PDB_lines.back().push_back(line);
+            }
+            if (compress_type) getline(fin_gz, line);
+            else               getline(fin, line);
+        }
+    }
+    else if (infmt_opt==2) // xyz format
+    {
+        size_t L=0;
+        stringstream i8_stream;
+        while (compress_type?fin_gz.good():fin.good())
+        {
+            if (compress_type) getline(fin_gz, line);
+            else               getline(fin, line);
+            L=atoi(line.c_str());
+            if (compress_type) getline(fin_gz, line);
+            else               getline(fin, line);
+            for (i=0;i<line.size();i++)
+                if (line[i]==' '||line[i]=='\t') break;
+            if (!(compress_type?fin_gz.good():fin.good())) break;
+            PDB_lines.push_back(tmp_str_vec);
+            for (i=0;i<L;i++)
+            {
+                if (compress_type) getline(fin_gz, line);
+                else               getline(fin, line);
+                i8_stream<<"ATOM   "<<setw(4)<<i+1<<"  CA  "
+                    <<AAmap(line[0])<<"  "<<setw(4)<<i+1<<"    "
+                    <<line.substr(2,8)<<line.substr(11,8)<<line.substr(20,8);
+                line=i8_stream.str();
+                i8_stream.str(string());
+                PDB_lines.back().push_back(line);
+            }
+        }
+    }
+    else if (infmt_opt==3) // PDBx/mmCIF format
+    {
+        bool loop_ = false; // not reading following content
+        map<string,int> _atom_site;
+        int atom_site_pos;
+        vector<string> line_vec;
+        string alt_id=".";  // alternative location indicator
+        string asym_id="."; // this is similar to chainID, except that
+                            // chainID is char while asym_id is a string
+                            // with possibly multiple char
+        string prev_asym_id="";
+        string AA="";       // residue name
+        string atom="";
+        string resi="";
+        string model_index=""; // the same as model_idx but type is string
+        stringstream i8_stream;
+        while (compress_type?fin_gz.good():fin.good())
+        {
+            if (compress_type) getline(fin_gz, line);
+            else               getline(fin, line);
+            if (line.size()==0) continue;
+            if (loop_) loop_ = (line.size()>=2)?(line.compare(0,2,"# ")):(line.compare(0,1,"#"));
+            if (!loop_)
+            {
+                if (line.compare(0,5,"loop_")) continue;
+                while(1)
+                {
+                    if (compress_type)
+                    {
+                        if (fin_gz.good()) getline(fin_gz, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                    }
+                    else
+                    {
+                        if (fin.good()) getline(fin, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                    }
+                    if (line.size()) break;
+                }
+                if (line.compare(0,11,"_atom_site.")) continue;
+
+                loop_=true;
+                _atom_site.clear();
+                atom_site_pos=0;
+                _atom_site[Trim(line.substr(11))]=atom_site_pos;
+
+                while(1)
+                {
+                    if (compress_type) getline(fin_gz, line);
+                    else               getline(fin, line);
+                    if (line.size()==0) continue;
+                    if (line.compare(0,11,"_atom_site.")) break;
+                    _atom_site[Trim(line.substr(11))]=++atom_site_pos;
+                }
+
+
+                if (_atom_site.count("group_PDB")*
+                    _atom_site.count("label_atom_id")*
+                    _atom_site.count("label_comp_id")*
+                   (_atom_site.count("auth_asym_id")+
+                    _atom_site.count("label_asym_id"))*
+                   (_atom_site.count("auth_seq_id")+
+                    _atom_site.count("label_seq_id"))*
+                    _atom_site.count("Cartn_x")*
+                    _atom_site.count("Cartn_y")*
+                    _atom_site.count("Cartn_z")==0)
+                {
+                    loop_ = false;
+                    cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, label_atom_id, label_comp_id, auth_asym_id/label_asym_id, auth_seq_id/label_seq_id, Cartn_x, Cartn_y, Cartn_z"<<endl;
+                    continue;
+                }
+            }
+
+            line_vec.clear();
+            split(line,line_vec);
+            if ((line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                 line_vec[_atom_site["group_PDB"]]!="HETATM") ||
+                (line_vec[_atom_site["group_PDB"]]=="HETATM" &&
+                 (het_opt==0 || 
+                 (het_opt==2 && line_vec[_atom_site["label_comp_id"]]!="MSE")))
+                ) continue;
+            
+            alt_id=".";
+            if (_atom_site.count("label_alt_id")) // in 39.4 % of entries
+                alt_id=line_vec[_atom_site["label_alt_id"]];
+            if (alt_id!="." && alt_id!="A") continue;
+
+            atom=line_vec[_atom_site["label_atom_id"]];
+            if (atom[0]=='"') atom=atom.substr(1);
+            if (atom.size() && atom[atom.size()-1]=='"')
+                atom=atom.substr(0,atom.size()-1);
+            if (atom.size()==0) continue;
+            if      (atom.size()==1) atom=" "+atom+"  ";
+            else if (atom.size()==2) atom=" "+atom+" "; // wrong for sidechain H
+            else if (atom.size()==3) atom=" "+atom;
+            else if (atom.size()>=5) continue;
+
+            AA=line_vec[_atom_site["label_comp_id"]]; // residue name
+            if      (AA.size()==1) AA="  "+AA;
+            else if (AA.size()==2) AA=" " +AA;
+            else if (AA.size()>=4) continue;
+
+            if (_atom_site.count("auth_asym_id"))
+                 asym_id=line_vec[_atom_site["auth_asym_id"]];
+            else asym_id=line_vec[_atom_site["label_asym_id"]];
+            if (asym_id==".") asym_id=" ";
+            
+            if (_atom_site.count("pdbx_PDB_model_num") && 
+                model_index!=line_vec[_atom_site["pdbx_PDB_model_num"]])
+            {
+                model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+                if (PDB_lines.size() && ter_opt>=1) break;
+                if (PDB_lines.size()==0 || split_opt>=1)
+                {
+                    PDB_lines.push_back(tmp_str_vec);
+                    prev_asym_id=asym_id;
+                }
+            }
+
+            if (prev_asym_id!=asym_id)
+            {
+                if (prev_asym_id!="" && ter_opt>=2) break;
+                if (split_opt>=2) PDB_lines.push_back(tmp_str_vec);
+            }
+            if (prev_asym_id!=asym_id) prev_asym_id=asym_id;
+
+            if (_atom_site.count("auth_seq_id"))
+                 resi=line_vec[_atom_site["auth_seq_id"]];
+            else resi=line_vec[_atom_site["label_seq_id"]];
+            if (_atom_site.count("pdbx_PDB_ins_code") && 
+                line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+            else resi+=" ";
+
+            i++;
+            i8_stream<<"ATOM  "
+                <<setw(5)<<i<<" "<<atom<<" "<<AA<<setw(2)<<asym_id.substr(0,2)
+                <<setw(5)<<resi.substr(0,5)<<"   "
+                <<setw(8)<<line_vec[_atom_site["Cartn_x"]].substr(0,8)
+                <<setw(8)<<line_vec[_atom_site["Cartn_y"]].substr(0,8)
+                <<setw(8)<<line_vec[_atom_site["Cartn_z"]].substr(0,8);
+            PDB_lines.back().push_back(i8_stream.str());
+            i8_stream.str(string());
+        }
+        _atom_site.clear();
+        line_vec.clear();
+        alt_id.clear();
+        asym_id.clear();
+        AA.clear();
+    }
+
+    if (compress_type) fin_gz.close();
+    else               fin.close();
+    line.clear();
+    return PDB_lines.size();
+}
+
+void output_dock(const vector<string>&chain_list, const int ter_opt,
+    const int split_opt, const int infmt_opt, const string atom_opt,
+    const int mirror_opt, double **ut_mat, const string&fname_super)
+{
+    size_t i;
+    int chain_i,a;
+    string name;
+    int chainnum;
+    double x[3];  // before transform
+    double x1[3]; // after transform
+    string line;
+    vector<vector<string> >PDB_lines;
+    int m=0;
+    double t[3];
+    double u[3][3];
+    int ui,uj;
+    stringstream buf;
+    string filename;
+    int het_opt=1;
+    for (i=0;i<chain_list.size();i++)
+    {
+        name=chain_list[i];
+        chainnum=get_full_PDB_lines(name, PDB_lines,
+            ter_opt, infmt_opt, split_opt, het_opt);
+        if (!chainnum) continue;
+        clear_full_PDB_lines(PDB_lines, atom_opt); // clear chains with <3 residue
+        for (chain_i=0;chain_i<chainnum;chain_i++)
+        {
+            if (PDB_lines[chain_i].size()<3) continue;
+            buf<<fname_super<<'.'<<m<<".pdb";
+            filename=buf.str();
+            buf.str(string());
+            for (ui=0;ui<3;ui++) for (uj=0;uj<3;uj++) u[ui][uj]=ut_mat[m][ui*3+uj];
+            for (uj=0;uj<3;uj++) t[uj]=ut_mat[m][9+uj];
+            for (a=0;a<PDB_lines[chain_i].size();a++)
+            {
+                line=PDB_lines[chain_i][a];
+                x[0]=atof(line.substr(30,8).c_str());
+                x[1]=atof(line.substr(38,8).c_str());
+                x[2]=atof(line.substr(46,8).c_str());
+                if (mirror_opt) x[2]=-x[2];
+                transform(t, u, x, x1);
+                buf<<line.substr(0,30)<<setiosflags(ios::fixed)
+                   <<setprecision(3)
+                   <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]
+                   <<line.substr(54)<<'\n';
+            }
+            buf<<"TER"<<endl;
+            ofstream fp;
+            fp.open(filename.c_str());
+            fp<<buf.str();
+            fp.close();
+            buf.str(string());
+            PDB_lines[chain_i].clear();
+            m++;
+        } // chain_i
+        name.clear();
+        PDB_lines.clear();
+    } // i
+    vector<vector<string> >().swap(PDB_lines);
+    line.clear();
+}
+
+void parse_chain_list(const vector<string>&chain_list,
+    vector<vector<vector<double> > >&a_vec, vector<vector<char> >&seq_vec,
+    vector<vector<char> >&sec_vec, vector<int>&mol_vec, vector<int>&len_vec,
+    vector<string>&chainID_list, const int ter_opt, const int split_opt,
+    const string mol_opt, const int infmt_opt, const string atom_opt,
+    const int mirror_opt, const int het_opt, int &len_aa, int &len_na,  
+    const int o_opt, vector<string>&resi_vec)
+{
+    size_t i;
+    int chain_i,r;
+    string name;
+    int chainnum;
+    double **xa;
+    int len;
+    char *seq,*sec;
+
+    vector<vector<string> >PDB_lines;
+    vector<double> tmp_atom_array(3,0);
+    vector<vector<double> > tmp_chain_array;
+    vector<char>tmp_seq_array;
+    vector<char>tmp_sec_array;
+    //vector<string> resi_vec;
+    int read_resi=2;
+
+    for (i=0;i<chain_list.size();i++)
+    {
+        name=chain_list[i];
+        chainnum=get_PDB_lines(name, PDB_lines, chainID_list,
+            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt, het_opt);
+        if (!chainnum)
+        {
+            cerr<<"Warning! Cannot parse file: "<<name
+                <<". Chain number 0."<<endl;
+            continue;
+        }
+        for (chain_i=0;chain_i<chainnum;chain_i++)
+        {
+            len=PDB_lines[chain_i].size();
+            if (!len)
+            {
+                cerr<<"Warning! Cannot parse file: "<<name
+                    <<". Chain length 0."<<endl;
+                continue;
+            }
+            else if (len<3)
+            {
+                cerr<<"Sequence is too short <3!: "<<name<<endl;
+                continue;
+            }
+            NewArray(&xa, len, 3);
+            seq = new char[len + 1];
+            sec = new char[len + 1];
+            len = read_PDB(PDB_lines[chain_i], xa, seq, resi_vec, read_resi);
+            if (mirror_opt) for (r=0;r<len;r++) xa[r][2]=-xa[r][2];
+            if (mol_vec[chain_i]>0 || mol_opt=="RNA")
+                make_sec(seq, xa, len, sec,atom_opt);
+            else make_sec(xa, len, sec); // secondary structure assignment
+            
+            /* store in vector */
+            tmp_chain_array.assign(len,tmp_atom_array);
+            vector<char>tmp_seq_array(len+1,0);
+            vector<char>tmp_sec_array(len+1,0);
+            for (r=0;r<len;r++)
+            {
+                tmp_chain_array[r][0]=xa[r][0];
+                tmp_chain_array[r][1]=xa[r][1];
+                tmp_chain_array[r][2]=xa[r][2];
+                tmp_seq_array[r]=seq[r];
+                tmp_sec_array[r]=sec[r];
+            }
+            a_vec.push_back(tmp_chain_array);
+            seq_vec.push_back(tmp_seq_array);
+            sec_vec.push_back(tmp_sec_array);
+            len_vec.push_back(len);
+
+            /* clean up */
+            tmp_chain_array.clear();
+            tmp_seq_array.clear();
+            tmp_sec_array.clear();
+            PDB_lines[chain_i].clear();
+            DeleteArray(&xa, len);
+            delete [] seq;
+            delete [] sec;
+        } // chain_i
+        name.clear();
+        PDB_lines.clear();
+        mol_vec.clear();
+    } // i
+    tmp_atom_array.clear();
+
+    if (mol_opt=="RNA") mol_vec.assign(a_vec.size(),1);
+    else if (mol_opt=="protein") mol_vec.assign(a_vec.size(),-1);
+    else
+    {
+        mol_vec.assign(a_vec.size(),0);
+        for (i=0;i<a_vec.size();i++)
+        {
+            for (r=0;r<len_vec[i];r++)
+            {
+                if (seq_vec[i][r]>='a' && seq_vec[i][r]<='z') mol_vec[i]++;
+                else mol_vec[i]--;
+            }
+        }
+    }
+
+    len_aa=0;
+    len_na=0;
+    for (i=0;i<a_vec.size();i++)
+    {
+        if (mol_vec[i]>0) len_na+=len_vec[i];
+        else              len_aa+=len_vec[i];
+    }
+}
+
+int copy_chain_pair_data(
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int chain1_num, int chain2_num,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
+    int *assign1_list, int *assign2_list, vector<string>&sequence)
+{
+    int i,j,r;
+    for (i=0;i<sequence.size();i++) sequence[i].clear();
+    sequence.clear();
+    sequence.push_back("");
+    sequence.push_back("");
+    int mol_type=0;
+    int xlen=0;
+    int ylen=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+        for (r=0;r<xlen_vec[i];r++)
+        {
+            seqx[xlen]=seqx_vec[i][r];
+            secx[xlen]=secx_vec[i][r];
+            xa[xlen][0]= xa_vec[i][r][0];
+            xa[xlen][1]= xa_vec[i][r][1];
+            xa[xlen][2]= xa_vec[i][r][2];
+            xlen++;
+        }
+        sequence[0]+=seqxA_mat[i][j];
+        for (r=0;r<ylen_vec[j];r++)
+        {
+            seqy[ylen]=seqy_vec[j][r];
+            secy[ylen]=secy_vec[j][r];
+            ya[ylen][0]= ya_vec[j][r][0];
+            ya[ylen][1]= ya_vec[j][r][1];
+            ya[ylen][2]= ya_vec[j][r][2];
+            ylen++;
+        }
+        sequence[1]+=seqyA_mat[i][j];
+        mol_type+=mol_vec1[i]+mol_vec2[j];
+    }
+    seqx[xlen]=0;
+    secx[xlen]=0;
+    seqy[ylen]=0;
+    secy[ylen]=0;
+    return mol_type;
+}
+
+double MMalign_search(
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int len_aa, int len_na, int chain1_num, int chain2_num, double **TMave_mat,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
+    int *assign1_list, int *assign2_list, vector<string>&sequence,
+    double d0_scale, bool fast_opt, const int i_opt=3)
+{
+    double total_score=0;
+    int i,j;
+    int xlen=0;
+    int ylen=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        if (assign1_list[i]<0) continue;
+        xlen+=xlen_vec[i];
+        ylen+=ylen_vec[assign1_list[i]];
+    }
+    if (xlen<=3 || ylen<=3) return total_score;
+
+    seqx = new char[xlen+1];
+    secx = new char[xlen+1];
+    NewArray(&xa, xlen, 3);
+    seqy = new char[ylen+1];
+    secy = new char[ylen+1];
+    NewArray(&ya, ylen, 3);
+
+    int mol_type=copy_chain_pair_data(xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, chain1_num, chain2_num,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence);
+
+    /* declare variable specific to this pair of TMalign */
+    double t0[3], u0[3][3];
+    double TM1, TM2;
+    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+    double d0_0, TM_0;
+    double d0A, d0B, d0u, d0a;
+    double d0_out=5.0;
+    string seqM, seqxA, seqyA;// for output alignment
+    double rmsd0 = 0.0;
+    int L_ali;                // Aligned length in standard_TMscore
+    double Liden=0;
+    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+    int n_ali=0;
+    int n_ali8=0;
+
+    double Lnorm_ass=len_aa+len_na;
+
+    /* entry function for structure alignment */
+    TMalign_main(xa, ya, seqx, seqy, secx, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        i_opt, false, true, false, fast_opt, mol_type, -1);
+
+    /* clean up */
+    delete [] seqx;
+    delete [] seqy;
+    delete [] secx;
+    delete [] secy;
+    DeleteArray(&xa,xlen);
+    DeleteArray(&ya,ylen);
+
+    /* re-compute chain level alignment */
+    for (i=0;i<chain1_num;i++)
+    {
+        xlen=xlen_vec[i];
+        if (xlen<3)
+        {
+            for (j=0;j<chain2_num;j++) TMave_mat[i][j]=-1;
+            continue;
+        }
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
+            xlen,xa,seqx,secx);
+
+        double **xt;
+        NewArray(&xt, xlen, 3);
+        do_rotation(xa, xt, xlen, t0, u0);
+
+        for (j=0;j<chain2_num;j++)
+        {
+            if (mol_vec1[i]*mol_vec2[j]<0) //no protein-RNA alignment
+            {
+                TMave_mat[i][j]=-1;
+                continue;
+            }
+
+            ylen=ylen_vec[j];
+            if (ylen<3)
+            {
+                TMave_mat[i][j]=-1;
+                continue;
+            }
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            NewArray(&ya, ylen, 3);
+            copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
+                ylen,ya,seqy,secy);
+
+            /* declare variable specific to this pair of TMalign */
+            d0_out=5.0;
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+            rmsd0 = 0.0;
+            Liden=0;
+            int *invmap = new int[ylen+1];
+
+            double Lnorm_ass=len_aa;
+            if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_ass=len_na;
+
+            /* entry function for structure alignment */
+            se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                0, false, 2, false, mol_vec1[i]+mol_vec2[j], 1, invmap);
+
+            /* print result */
+            seqxA_mat[i][j]=seqxA;
+            seqyA_mat[i][j]=seqyA;
+
+            TMave_mat[i][j]=TM4*Lnorm_ass;
+            if (assign1_list[i]==j) total_score+=TMave_mat[i][j];
+
+            /* clean up */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+            delete[]invmap;
+        }
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+        DeleteArray(&xt,xlen);
+    }
+    return total_score;
+}
+
+void MMalign_final(
+    const string xname, const string yname,
+    const vector<string> chainID_list1, const vector<string> chainID_list2,
+    string fname_super, string fname_lign, string fname_matrix,
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int len_aa, int len_na, int chain1_num, int chain2_num,
+    double **TMave_mat,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqM_mat,
+    vector<vector<string> >&seqyA_mat, int *assign1_list, int *assign2_list,
+    vector<string>&sequence, const double d0_scale, const bool m_opt,
+    const int o_opt, const int outfmt_opt, const int ter_opt,
+    const int split_opt, const bool a_opt, const bool d_opt,
+    const bool fast_opt, const bool full_opt, const int mirror_opt,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2)
+{
+    int i,j;
+    int xlen=0;
+    int ylen=0;
+    for (i=0;i<chain1_num;i++) xlen+=xlen_vec[i];
+    for (j=0;j<chain2_num;j++) ylen+=ylen_vec[j];
+    if (xlen<=3 || ylen<=3) return;
+
+    seqx = new char[xlen+1];
+    secx = new char[xlen+1];
+    NewArray(&xa, xlen, 3);
+    seqy = new char[ylen+1];
+    secy = new char[ylen+1];
+    NewArray(&ya, ylen, 3);
+
+    int mol_type=copy_chain_pair_data(xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, chain1_num, chain2_num,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence);
+
+    /* declare variable specific to this pair of TMalign */
+    double t0[3], u0[3][3];
+    double TM1, TM2;
+    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+    double d0_0, TM_0;
+    double d0A, d0B, d0u, d0a;
+    double d0_out=5.0;
+    string seqM, seqxA, seqyA;// for output alignment
+    double rmsd0 = 0.0;
+    int L_ali;                // Aligned length in standard_TMscore
+    double Liden=0;
+    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+    int n_ali=0;
+    int n_ali8=0;
+
+    double Lnorm_ass=len_aa+len_na;
+
+    /* entry function for structure alignment */
+    TMalign_main(xa, ya, seqx, seqy, secx, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        3, a_opt, false, d_opt, fast_opt, mol_type, -1);
+
+    /* prepare full complex alignment */
+    string chainID1="";
+    string chainID2="";
+    sequence.clear();
+    sequence.push_back(""); // seqxA
+    sequence.push_back(""); // seqyA
+    sequence.push_back(""); // seqM
+    int aln_start=0;
+    int aln_end=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+        chainID1+=chainID_list1[i];
+        chainID2+=chainID_list2[j];
+        sequence[0]+=seqxA_mat[i][j]+'*';
+        sequence[1]+=seqyA_mat[i][j]+'*';
+
+        aln_end+=seqxA_mat[i][j].size();
+        seqM_mat[i][j]=seqM.substr(aln_start,aln_end-aln_start);
+        sequence[2]+=seqM_mat[i][j]+'*';
+        aln_start=aln_end;
+    }
+
+    /* prepare unaligned region */
+    for (i=0;i<chain1_num;i++)
+    {
+        if (assign1_list[i]>=0) continue;
+        chainID1+=chainID_list1[i];
+        chainID2+=':';
+        string s(seqx_vec[i].begin(),seqx_vec[i].end());
+        sequence[0]+=s.substr(0,xlen_vec[i])+'*';
+        sequence[1]+=string(xlen_vec[i],'-')+'*';
+        s.clear();
+        sequence[2]+=string(xlen_vec[i],' ')+'*';
+    }
+    for (j=0;j<chain2_num;j++)
+    {
+        if (assign2_list[j]>=0) continue;
+        chainID1+=':';
+        chainID2+=chainID_list2[j];
+        string s(seqy_vec[j].begin(),seqy_vec[j].end());
+        sequence[0]+=string(ylen_vec[j],'-')+'*';
+        sequence[1]+=s.substr(0,ylen_vec[j])+'*';
+        s.clear();
+        sequence[2]+=string(ylen_vec[j],' ')+'*';
+    }
+
+    /* print alignment */
+    output_results(xname, yname, chainID1.c_str(), chainID2.c_str(),
+        xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
+        sequence[2].c_str(), sequence[0].c_str(), sequence[1].c_str(),
+        Liden, n_ali8, L_ali, TM_ali, rmsd_ali,
+        TM_0, d0_0, d0A, d0B, 0, d0_scale, d0a, d0u, 
+        (m_opt?fname_matrix:"").c_str(), outfmt_opt, ter_opt, true,
+        split_opt, o_opt, fname_super,
+        false, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2);
+
+    /* clean up */
+    seqM.clear();
+    seqxA.clear();
+    seqyA.clear();
+    delete [] seqx;
+    delete [] seqy;
+    delete [] secx;
+    delete [] secy;
+    DeleteArray(&xa,xlen);
+    DeleteArray(&ya,ylen);
+    sequence[0].clear();
+    sequence[1].clear();
+    sequence[2].clear();
+
+    if (!full_opt) return;
+
+    cout<<"# End of alignment for full complex. The following blocks list alignments for individual chains."<<endl;
+
+    /* re-compute chain level alignment */
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+        xlen=xlen_vec[i];
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
+            xlen,xa,seqx,secx);
+
+        double **xt;
+        NewArray(&xt, xlen, 3);
+        do_rotation(xa, xt, xlen, t0, u0);
+
+        ylen=ylen_vec[j];
+        if (ylen<3)
+        {
+            TMave_mat[i][j]=-1;
+            continue;
+        }
+        seqy = new char[ylen+1];
+        secy = new char[ylen+1];
+        NewArray(&ya, ylen, 3);
+        copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
+            ylen,ya,seqy,secy);
+
+        /* declare variable specific to this pair of TMalign */
+        d0_out=5.0;
+        rmsd0 = 0.0;
+        Liden=0;
+        int *invmap = new int[ylen+1];
+        seqM="";
+        seqxA="";
+        seqyA="";
+        double Lnorm_ass=len_aa;
+        if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_ass=len_na;
+        sequence[0]=seqxA_mat[i][j];
+        sequence[1]=seqyA_mat[i][j];
+
+        /* entry function for structure alignment */
+        se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+            xlen, ylen, sequence, Lnorm_ass, d0_scale,
+            1, a_opt, 2, d_opt, mol_vec1[i]+mol_vec2[j], 1, invmap);
+
+        //TM2=TM4*Lnorm_ass/xlen;
+        //TM1=TM4*Lnorm_ass/ylen;
+        //d0A=d0u;
+        //d0B=d0u;
+
+        /* print result */
+        output_results(xname, yname,
+            chainID_list1[i].c_str(), chainID_list2[j].c_str(),
+            xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
+            seqM_mat[i][j].c_str(), seqxA_mat[i][j].c_str(),
+            seqyA_mat[i][j].c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali,
+            TM_0, d0_0, d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, 
+            "", outfmt_opt, ter_opt, false, split_opt, 0,
+            "", false, a_opt, false, d_opt, 0, resi_vec1, resi_vec2);
+
+        /* clean up */
+        seqxA.clear();
+        seqM.clear();
+        seqyA.clear();
+        sequence[0].clear();
+        sequence[1].clear();
+        delete[]seqy;
+        delete[]secy;
+        DeleteArray(&ya,ylen);
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+        DeleteArray(&xt,xlen);
+        delete[]invmap;
+    }
+    sequence.clear();
+    return;
+}
+
+void copy_chain_assign_data(int chain1_num, int chain2_num, 
+    vector<string> &sequence,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
+    int *assign1_list, int *assign2_list, double **TMave_mat,
+    vector<vector<string> >&seqxA_tmp, vector<vector<string> >&seqyA_tmp,
+    int *assign1_tmp,  int *assign2_tmp,  double **TMave_tmp)
+{
+    int i,j;
+    for (i=0;i<sequence.size();i++) sequence[i].clear();
+    sequence.clear();
+    sequence.push_back("");
+    sequence.push_back("");
+    for (i=0;i<chain1_num;i++) assign1_tmp[i]=assign1_list[i];
+    for (i=0;i<chain2_num;i++) assign2_tmp[i]=assign2_list[i];
+    for (i=0;i<chain1_num;i++)
+    {
+        for (j=0;j<chain2_num;j++)
+        {
+            seqxA_tmp[i][j]=seqxA_mat[i][j];
+            seqyA_tmp[i][j]=seqyA_mat[i][j];
+            TMave_tmp[i][j]=TMave_mat[i][j];
+            if (assign1_list[i]==j)
+            {
+                sequence[0]+=seqxA_mat[i][j];
+                sequence[1]+=seqyA_mat[i][j];
+            }
+        }
+    }
+    return;
+}
+
+void MMalign_iter(double & max_total_score, const int max_iter,
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int len_aa, int len_na, int chain1_num, int chain2_num, double **TMave_mat,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
+    int *assign1_list, int *assign2_list, vector<string>&sequence,
+    double d0_scale, bool fast_opt)
+{
+    /* tmp assignment */
+    double total_score;
+    int *assign1_tmp, *assign2_tmp;
+    assign1_tmp=new int[chain1_num];
+    assign2_tmp=new int[chain2_num];
+    double **TMave_tmp;
+    NewArray(&TMave_tmp,chain1_num,chain2_num);
+    vector<string> tmp_str_vec(chain2_num,"");
+    vector<vector<string> >seqxA_tmp(chain1_num,tmp_str_vec);
+    vector<vector<string> >seqyA_tmp(chain1_num,tmp_str_vec);
+    vector<string> sequence_tmp;
+    copy_chain_assign_data(chain1_num, chain2_num, sequence_tmp,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat,
+        seqxA_tmp, seqyA_tmp, assign1_tmp,  assign2_tmp,  TMave_tmp);
+    
+    for (int iter=0;iter<max_iter;iter++)
+    {
+        total_score=MMalign_search(xa_vec, ya_vec, seqx_vec, seqy_vec,
+            secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+            xa, ya, seqx, seqy, secx, secy, len_aa, len_na,
+            chain1_num, chain2_num, 
+            TMave_tmp, seqxA_tmp, seqyA_tmp, assign1_tmp, assign2_tmp,
+            sequence, d0_scale, fast_opt);
+        total_score=enhanced_greedy_search(TMave_tmp, assign1_tmp,
+            assign2_tmp, chain1_num, chain2_num);
+        //if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain");
+        if (total_score<=max_total_score) break;
+        max_total_score=total_score;
+        copy_chain_assign_data(chain1_num, chain2_num, sequence,
+            seqxA_tmp, seqyA_tmp, assign1_tmp,  assign2_tmp,  TMave_tmp,
+            seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat);
+    }
+
+    /* clean up everything */
+    delete [] assign1_tmp;
+    delete [] assign2_tmp;
+    DeleteArray(&TMave_tmp,chain1_num);
+    vector<string>().swap(tmp_str_vec);
+    vector<vector<string> >().swap(seqxA_tmp);
+    vector<vector<string> >().swap(seqyA_tmp);
+}
+
+
+/* Input: vectors x, y, rotation matrix t, u, scale factor d02, and gap_open
+ * Output: j2i[1:len2] \in {1:len1} U {-1}
+ * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
+void NWDP_TM_dimer(bool **path, double **val, double **x, double **y,
+    int len1, int len2, bool **mask,
+    double t[3], double u[3][3], double d02, double gap_open, int j2i[])
+{
+    int i, j;
+    double h, v, d;
+
+    //initialization
+    for(i=0; i<=len1; i++)
+    {
+        //val[i][0]=0;
+        val[i][0]=i*gap_open;
+        path[i][0]=false; //not from diagonal
+    }
+
+    for(j=0; j<=len2; j++)
+    {
+        //val[0][j]=0;
+        val[0][j]=j*gap_open;
+        path[0][j]=false; //not from diagonal
+        j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
+    }      
+    double xx[3], dij;
+
+
+    //decide matrix and path
+    for(i=1; i<=len1; i++)
+    {
+        transform(t, u, &x[i-1][0], xx);
+        for(j=1; j<=len2; j++)
+        {
+            d=FLT_MIN;
+            if (mask[i][j])
+            {
+                dij=dist(xx, &y[j-1][0]);    
+                d=val[i-1][j-1] +  1.0/(1+dij/d02);
+            } 
+
+            //symbol insertion in horizontal (= a gap in vertical)
+            h=val[i-1][j];
+            if(path[i-1][j]) h += gap_open; //aligned in last position
+
+            //symbol insertion in vertical
+            v=val[i][j-1];
+            if(path[i][j-1]) v += gap_open; //aligned in last position
+
+
+            if(d>=h && d>=v)
+            {
+                path[i][j]=true; //from diagonal
+                val[i][j]=d;
+            }
+            else 
+            {
+                path[i][j]=false; //from horizontal
+                if(v>=h) val[i][j]=v;
+                else val[i][j]=h;
+            }
+        } //for i
+    } //for j
+
+    //trace back to extract the alignment
+    i=len1;
+    j=len2;
+    while(i>0 && j>0)
+    {
+        if(path[i][j]) //from diagonal
+        {
+            j2i[j-1]=i-1;
+            i--;
+            j--;
+        }
+        else 
+        {
+            h=val[i-1][j];
+            if(path[i-1][j]) h +=gap_open;
+
+            v=val[i][j-1];
+            if(path[i][j-1]) v +=gap_open;
+
+            if(v>=h) j--;
+            else i--;
+        }
+    }
+}
+
+/* +ss
+ * Input: secondary structure secx, secy, and gap_open
+ * Output: j2i[1:len2] \in {1:len1} U {-1}
+ * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
+void NWDP_TM_dimer(bool **path, double **val, const char *secx, const char *secy,
+    const int len1, const int len2, bool **mask, const double gap_open, int j2i[])
+{
+
+    int i, j;
+    double h, v, d;
+
+    //initialization
+    for(i=0; i<=len1; i++)
+    {
+        //val[i][0]=0;
+        val[i][0]=i*gap_open;
+        path[i][0]=false; //not from diagonal
+    }
+
+    for(j=0; j<=len2; j++)
+    {
+        //val[0][j]=0;
+        val[0][j]=j*gap_open;
+        path[0][j]=false; //not from diagonal
+        j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
+    }      
+
+    //decide matrix and path
+    for(i=1; i<=len1; i++)
+    {
+        for(j=1; j<=len2; j++)
+        {
+            d=FLT_MIN;
+            if (mask[i][j])
+                d=val[i-1][j-1] + 1.0*(secx[i-1]==secy[j-1]);
+
+            //symbol insertion in horizontal (= a gap in vertical)
+            h=val[i-1][j];
+            if(path[i-1][j]) h += gap_open; //aligned in last position
+
+            //symbol insertion in vertical
+            v=val[i][j-1];
+            if(path[i][j-1]) v += gap_open; //aligned in last position
+
+            if(d>=h && d>=v)
+            {
+                path[i][j]=true; //from diagonal
+                val[i][j]=d;
+            }
+            else 
+            {
+                path[i][j]=false; //from horizontal
+                if(v>=h) val[i][j]=v;
+                else val[i][j]=h;
+            }
+        } //for i
+    } //for j
+
+    //trace back to extract the alignment
+    i=len1;
+    j=len2;
+    while(i>0 && j>0)
+    {
+        if(path[i][j]) //from diagonal
+        {
+            j2i[j-1]=i-1;
+            i--;
+            j--;
+        }
+        else 
+        {
+            h=val[i-1][j];
+            if(path[i-1][j]) h +=gap_open;
+
+            v=val[i][j-1];
+            if(path[i][j-1]) v +=gap_open;
+
+            if(v>=h) j--;
+            else i--;
+        }
+    }
+}
+
+//heuristic run of dynamic programing iteratively to find the best alignment
+//input: initial rotation matrix t, u
+//       vectors x and y, d0
+//output: best alignment that maximizes the TMscore, will be stored in invmap
+double DP_iter_dimer(double **r1, double **r2, double **xtm, double **ytm,
+    double **xt, bool **path, double **val, double **x, double **y,
+    int xlen, int ylen, bool **mask, double t[3], double u[3][3], int invmap0[],
+    int g1, int g2, int iteration_max, double local_d0_search,
+    double D0_MIN, double Lnorm, double d0, double score_d8)
+{
+    double gap_open[2]={-0.6, 0};
+    double rmsd; 
+    int *invmap=new int[ylen+1];
+    
+    int iteration, i, j, k;
+    double tmscore, tmscore_max, tmscore_old=0;    
+    int score_sum_method=8, simplify_step=40;
+    tmscore_max=-1;
+
+    //double d01=d0+1.5;
+    double d02=d0*d0;
+    for(int g=g1; g<g2; g++)
+    {
+        for(iteration=0; iteration<iteration_max; iteration++)
+        {           
+            NWDP_TM_dimer(path, val, x, y, xlen, ylen, mask,
+                t, u, d02, gap_open[g], invmap);
+            
+            k=0;
+            for(j=0; j<ylen; j++) 
+            {
+                i=invmap[j];
+
+                if(i>=0) //aligned
+                {
+                    xtm[k][0]=x[i][0];
+                    xtm[k][1]=x[i][1];
+                    xtm[k][2]=x[i][2];
+                    
+                    ytm[k][0]=y[j][0];
+                    ytm[k][1]=y[j][1];
+                    ytm[k][2]=y[j][2];
+                    k++;
+                }
+            }
+
+            tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u,
+                simplify_step, score_sum_method, &rmsd, local_d0_search,
+                Lnorm, score_d8, d0);
+
+           
+            if(tmscore>tmscore_max)
+            {
+                tmscore_max=tmscore;
+                for(i=0; i<ylen; i++) invmap0[i]=invmap[i];
+            }
+    
+            if(iteration>0)
+            {
+                if(fabs(tmscore_old-tmscore)<0.000001) break;       
+            }
+            tmscore_old=tmscore;
+        }// for iteration           
+        
+    }//for gapopen
+    
+    
+    delete []invmap;
+    return tmscore_max;
+}
+
+void get_initial_ss_dimer(bool **path, double **val, const char *secx,
+    const char *secy, int xlen, int ylen, bool **mask, int *y2x)
+{
+    double gap_open=-1.0;
+    NWDP_TM_dimer(path, val, secx, secy, xlen, ylen, mask, gap_open, y2x);
+}
+
+bool get_initial5_dimer( double **r1, double **r2, double **xtm, double **ytm,
+    bool **path, double **val, double **x, double **y, int xlen, int ylen,
+    bool **mask, int *y2x,
+    double d0, double d0_search, const bool fast_opt, const double D0_MIN)
+{
+    double GL, rmsd;
+    double t[3];
+    double u[3][3];
+
+    double d01 = d0 + 1.5;
+    if (d01 < D0_MIN) d01 = D0_MIN;
+    double d02 = d01*d01;
+
+    double GLmax = 0;
+    int aL = getmin(xlen, ylen);
+    int *invmap = new int[ylen + 1];
+
+    // jump on sequence1-------------->
+    int n_jump1 = 0;
+    if (xlen > 250)
+        n_jump1 = 45;
+    else if (xlen > 200)
+        n_jump1 = 35;
+    else if (xlen > 150)
+        n_jump1 = 25;
+    else
+        n_jump1 = 15;
+    if (n_jump1 > (xlen / 3))
+        n_jump1 = xlen / 3;
+
+    // jump on sequence2-------------->
+    int n_jump2 = 0;
+    if (ylen > 250)
+        n_jump2 = 45;
+    else if (ylen > 200)
+        n_jump2 = 35;
+    else if (ylen > 150)
+        n_jump2 = 25;
+    else
+        n_jump2 = 15;
+    if (n_jump2 > (ylen / 3))
+        n_jump2 = ylen / 3;
+
+    // fragment to superimpose-------------->
+    int n_frag[2] = { 20, 100 };
+    if (n_frag[0] > (aL / 3))
+        n_frag[0] = aL / 3;
+    if (n_frag[1] > (aL / 2))
+        n_frag[1] = aL / 2;
+
+    // start superimpose search-------------->
+    if (fast_opt)
+    {
+        n_jump1*=5;
+        n_jump2*=5;
+    }
+    bool flag = false;
+    for (int i_frag = 0; i_frag < 2; i_frag++)
+    {
+        int m1 = xlen - n_frag[i_frag] + 1;
+        int m2 = ylen - n_frag[i_frag] + 1;
+
+        for (int i = 0; i<m1; i = i + n_jump1) //index starts from 0, different from FORTRAN
+        {
+            for (int j = 0; j<m2; j = j + n_jump2)
+            {
+                for (int k = 0; k<n_frag[i_frag]; k++) //fragment in y
+                {
+                    r1[k][0] = x[k + i][0];
+                    r1[k][1] = x[k + i][1];
+                    r1[k][2] = x[k + i][2];
+
+                    r2[k][0] = y[k + j][0];
+                    r2[k][1] = y[k + j][1];
+                    r2[k][2] = y[k + j][2];
+                }
+
+                // superpose the two structures and rotate it
+                Kabsch(r1, r2, n_frag[i_frag], 1, &rmsd, t, u);
+
+                double gap_open = 0.0;
+                NWDP_TM_dimer(path, val, x, y, xlen, ylen, mask,
+                    t, u, d02, gap_open, invmap);
+                GL = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen,
+                    invmap, d0, d0_search, t, u);
+                if (GL>GLmax)
+                {
+                    GLmax = GL;
+                    for (int ii = 0; ii<ylen; ii++) y2x[ii] = invmap[ii];
+                    flag = true;
+                }
+            }
+        }
+    }
+
+    delete[] invmap;
+    return flag;
+}
+
+void get_initial_ssplus_dimer(double **r1, double **r2, double **score,
+    bool **path, double **val, const char *secx, const char *secy,
+    double **x, double **y, int xlen, int ylen, bool **mask,
+    int *y2x0, int *y2x, const double D0_MIN, double d0)
+{
+    //create score matrix for DP
+    score_matrix_rmsd_sec(r1, r2, score, secx, secy, x, y, xlen, ylen,
+        y2x0, D0_MIN,d0);
+
+    int i,j;
+    for (i=0;i<xlen+1;i++) for (j=0;j<ylen+1;j++) score[i][j]=FLT_MIN;
+    
+    double gap_open=-1.0;
+    NWDP_TM(score, path, val, xlen, ylen, gap_open, y2x);
+}
+
+/* Entry function for TM-align. Return TM-score calculation status:
+ * 0   - full TM-score calculation 
+ * 1   - terminated due to exception
+ * 2-7 - pre-terminated due to low TM-score */
+int TMalign_dimer_main(double **xa, double **ya,
+    const char *seqx, const char *seqy, const char *secx, const char *secy,
+    double t0[3], double u0[3][3],
+    double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
+    double &d0_0, double &TM_0,
+    double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
+    string &seqM, string &seqxA, string &seqyA,
+    double &rmsd0, int &L_ali, double &Liden,
+    double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
+    const int xlen, const int ylen,
+    bool **mask,
+    const vector<string> sequence, const double Lnorm_ass,
+    const double d0_scale, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const bool fast_opt,
+    const int mol_type, const double TMcut=-1)
+{
+    double D0_MIN;        //for d0
+    double Lnorm;         //normalization length
+    double score_d8,d0,d0_search,dcu0;//for TMscore search
+    double t[3], u[3][3]; //Kabsch translation vector and rotation matrix
+    double **score;       // Input score table for dynamic programming
+    bool   **path;        // for dynamic programming  
+    double **val;         // for dynamic programming  
+    double **xtm, **ytm;  // for TMscore search engine
+    double **xt;          //for saving the superposed version of r_1 or xtm
+    double **r1, **r2;    // for Kabsch rotation
+
+    /***********************/
+    /* allocate memory     */
+    /***********************/
+    int minlen = min(xlen, ylen);
+    NewArray(&score, xlen+1, ylen+1);
+    NewArray(&path, xlen+1, ylen+1);
+    NewArray(&val, xlen+1, ylen+1);
+    NewArray(&xtm, minlen, 3);
+    NewArray(&ytm, minlen, 3);
+    NewArray(&xt, xlen, 3);
+    NewArray(&r1, minlen, 3);
+    NewArray(&r2, minlen, 3);
+
+    /***********************/
+    /*    parameter set    */
+    /***********************/
+    parameter_set4search(xlen, ylen, D0_MIN, Lnorm, 
+        score_d8, d0, d0_search, dcu0);
+    int simplify_step    = 40; //for simplified search engine
+    int score_sum_method = 8;  //for scoring method, whether only sum over pairs with dis<score_d8
+
+    int i;
+    int *invmap0         = new int[ylen+1];
+    int *invmap          = new int[ylen+1];
+    double TM, TMmax=-1;
+    for(i=0; i<ylen; i++) invmap0[i]=-1;
+
+    double ddcc=0.4;
+    if (Lnorm <= 40) ddcc=0.1;   //Lnorm was setted in parameter_set4search
+    double local_d0_search = d0_search;
+
+    //************************************************//
+    //    get initial alignment from user's input:    //
+    //    Stick to the initial alignment              //
+    //************************************************//
+    bool bAlignStick = false;
+    if (i_opt==3)// if input has set parameter for "-I"
+    {
+        // In the original code, this loop starts from 1, which is
+        // incorrect. Fortran starts from 1 but C++ should starts from 0.
+        for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
+            invmap[j] = -1;
+
+        int i1 = -1;// in C version, index starts from zero, not from one
+        int i2 = -1;
+        int L1 = sequence[0].size();
+        int L2 = sequence[1].size();
+        int L = min(L1, L2);// Get positions for aligned residues
+        for (int kk1 = 0; kk1 < L; kk1++)
+        {
+            if (sequence[0][kk1] != '-') i1++;
+            if (sequence[1][kk1] != '-')
+            {
+                i2++;
+                if (i2 >= ylen || i1 >= xlen) kk1 = L;
+                else if (sequence[0][kk1] != '-') invmap[i2] = i1;
+            }
+        }
+
+        //--------------- 2. Align proteins from original alignment
+        double prevD0_MIN = D0_MIN;// stored for later use
+        int prevLnorm = Lnorm;
+        double prevd0 = d0;
+        TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+            invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8,
+            t, u, mol_type);
+        D0_MIN = prevD0_MIN;
+        Lnorm = prevLnorm;
+        d0 = prevd0;
+        TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+            invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0);
+        if (TM > TMmax)
+        {
+            TMmax = TM;
+            for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+        }
+        bAlignStick = true;
+    }
+
+    /******************************************************/
+    /*    get initial alignment with gapless threading    */
+    /******************************************************/
+    if (!bAlignStick)
+    {
+        get_initial(r1, r2, xtm, ytm, xa, ya, xlen, ylen, invmap0, d0,
+            d0_search, fast_opt, t, u);
+        TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap0,
+            t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
+            score_d8, d0);
+        if (TM>TMmax) TMmax = TM;
+        if (TMcut>0) copy_t_u(t, u, t0, u0);
+        //run dynamic programing iteratively to find the best alignment
+        TM = DP_iter_dimer(r1, r2, xtm, ytm, xt, path, val, xa, ya, xlen, ylen,
+             mask, t, u, invmap, 0, 2, (fast_opt)?2:30,
+             local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            if (TMcut>0) copy_t_u(t, u, t0, u0);
+        }
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.5*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 2;
+            }
+        }
+
+        /************************************************************/
+        /*    get initial alignment based on secondary structure    */
+        /************************************************************/
+        get_initial_ss_dimer(path, val, secx, secy, xlen, ylen, mask, invmap);
+        TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
+            t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
+            score_d8, d0);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            if (TMcut>0) copy_t_u(t, u, t0, u0);
+        }
+        if (TM > TMmax*0.2)
+        {
+            TM = DP_iter_dimer(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                xlen, ylen, mask, t, u, invmap, 0, 2,
+                (fast_opt)?2:30, local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                if (TMcut>0) copy_t_u(t, u, t0, u0);
+            }
+        }
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.52*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 3;
+            }
+        }
+
+        /************************************************************/
+        /*    get initial alignment based on local superposition    */
+        /************************************************************/
+        //=initial5 in original TM-align
+        if (get_initial5_dimer( r1, r2, xtm, ytm, path, val, xa, ya,
+            xlen, ylen, mask, invmap, d0, d0_search, fast_opt, D0_MIN))
+        {
+            TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+                invmap, t, u, simplify_step, score_sum_method,
+                local_d0_search, Lnorm, score_d8, d0);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                if (TMcut>0) copy_t_u(t, u, t0, u0);
+            }
+            if (TM > TMmax*ddcc)
+            {
+                TM = DP_iter_dimer(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                    xlen, ylen, mask, t, u, invmap, 0, 2, 2,
+                    local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+                if (TM>TMmax)
+                {
+                    TMmax = TM;
+                    for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                    if (TMcut>0) copy_t_u(t, u, t0, u0);
+                }
+            }
+        }
+        else
+            cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" << endl;
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.54*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 4;
+            }
+        }
+
+        /********************************************************************/
+        /* get initial alignment by local superposition+secondary structure */
+        /********************************************************************/
+        //=initial3 in original TM-align
+        get_initial_ssplus_dimer(r1, r2, score, path, val, secx, secy, xa, ya,
+            xlen, ylen, mask, invmap0, invmap, D0_MIN, d0);
+        TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
+             t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
+             score_d8, d0);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            if (TMcut>0) copy_t_u(t, u, t0, u0);
+        }
+        if (TM > TMmax*ddcc)
+        {
+            TM = DP_iter_dimer(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                xlen, ylen, mask, t, u, invmap, 0, 2,
+                (fast_opt)?2:30, local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                if (TMcut>0) copy_t_u(t, u, t0, u0);
+            }
+        }
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.56*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 5;
+            }
+        }
+
+        /*******************************************************************/
+        /*    get initial alignment based on fragment gapless threading    */
+        /*******************************************************************/
+        //=initial4 in original TM-align
+        get_initial_fgt(r1, r2, xtm, ytm, xa, ya, xlen, ylen,
+            invmap, d0, d0_search, dcu0, fast_opt, t, u);
+        TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
+            t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
+            score_d8, d0);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            if (TMcut>0) copy_t_u(t, u, t0, u0);
+        }
+        if (TM > TMmax*ddcc)
+        {
+            TM = DP_iter_dimer(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                xlen, ylen, mask, t, u, invmap, 1, 2, 2,
+                local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                if (TMcut>0) copy_t_u(t, u, t0, u0);
+            }
+        }
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.58*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 6;
+            }
+        }
+
+        //************************************************//
+        //    get initial alignment from user's input:    //
+        //************************************************//
+        if (i_opt==1)// if input has set parameter for "-i"
+        {
+            for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
+                invmap[j] = -1;
+
+            int i1 = -1;// in C version, index starts from zero, not from one
+            int i2 = -1;
+            int L1 = sequence[0].size();
+            int L2 = sequence[1].size();
+            int L = min(L1, L2);// Get positions for aligned residues
+            for (int kk1 = 0; kk1 < L; kk1++)
+            {
+                if (sequence[0][kk1] != '-')
+                    i1++;
+                if (sequence[1][kk1] != '-')
+                {
+                    i2++;
+                    if (i2 >= ylen || i1 >= xlen) kk1 = L;
+                    else if (sequence[0][kk1] != '-') invmap[i2] = i1;
+                }
+            }
+
+            //--------------- 2. Align proteins from original alignment
+            double prevD0_MIN = D0_MIN;// stored for later use
+            int prevLnorm = Lnorm;
+            double prevd0 = d0;
+            TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya,
+                xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0,
+                d0_search, score_d8, t, u, mol_type);
+            D0_MIN = prevD0_MIN;
+            Lnorm = prevLnorm;
+            d0 = prevd0;
+
+            TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya,
+                xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm,
+                score_d8, d0);
+            if (TM > TMmax)
+            {
+                TMmax = TM;
+                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            }
+            // Different from get_initial, get_initial_ss and get_initial_ssplus
+            TM = DP_iter_dimer(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                xlen, ylen, mask, t, u, invmap, 0, 2,
+                (fast_opt)?2:30, local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            }
+        }
+    }
+
+
+
+    //*******************************************************************//
+    //    The alignment will not be changed any more in the following    //
+    //*******************************************************************//
+    //check if the initial alignment is generated appropriately
+    bool flag=false;
+    for(i=0; i<ylen; i++)
+    {
+        if(invmap0[i]>=0)
+        {
+            flag=true;
+            break;
+        }
+    }
+    if(!flag)
+    {
+        cout << "There is no alignment between the two structures! "
+             << "Program stop with no result!" << endl;
+        TM1=TM2=TM3=TM4=TM5=0;
+        return 1;
+    }
+
+    /* last TM-score pre-termination */
+    if (TMcut>0)
+    {
+        double TMtmp=approx_TM(xlen, ylen, a_opt,
+            xa, ya, t0, u0, invmap0, mol_type);
+
+        if (TMtmp<0.6*TMcut)
+        {
+            TM1=TM2=TM3=TM4=TM5=TMtmp;
+            clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                xtm, ytm, xt, r1, r2, xlen, minlen);
+            return 7;
+        }
+    }
+
+    //********************************************************************//
+    //    Detailed TMscore search engine --> prepare for final TMscore    //
+    //********************************************************************//
+    //run detailed TMscore search engine for the best alignment, and
+    //extract the best rotation matrix (t, u) for the best alignment
+    simplify_step=1;
+    if (fast_opt) simplify_step=40;
+    score_sum_method=8;
+    TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+        invmap0, t, u, simplify_step, score_sum_method, local_d0_search,
+        false, Lnorm, score_d8, d0);
+
+    //select pairs with dis<d8 for final TMscore computation and output alignment
+    int k=0;
+    int *m1, *m2;
+    double d;
+    m1=new int[xlen]; //alignd index in x
+    m2=new int[ylen]; //alignd index in y
+    do_rotation(xa, xt, xlen, t, u);
+    k=0;
+    for(int j=0; j<ylen; j++)
+    {
+        i=invmap0[j];
+        if(i>=0)//aligned
+        {
+            n_ali++;
+            d=sqrt(dist(&xt[i][0], &ya[j][0]));
+            if (d <= score_d8 || (i_opt == 3))
+            {
+                m1[k]=i;
+                m2[k]=j;
+
+                xtm[k][0]=xa[i][0];
+                xtm[k][1]=xa[i][1];
+                xtm[k][2]=xa[i][2];
+
+                ytm[k][0]=ya[j][0];
+                ytm[k][1]=ya[j][1];
+                ytm[k][2]=ya[j][2];
+
+                r1[k][0] = xt[i][0];
+                r1[k][1] = xt[i][1];
+                r1[k][2] = xt[i][2];
+                r2[k][0] = ya[j][0];
+                r2[k][1] = ya[j][1];
+                r2[k][2] = ya[j][2];
+
+                k++;
+            }
+        }
+    }
+    n_ali8=k;
+
+    Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u);// rmsd0 is used for final output, only recalculate rmsd0, not t & u
+    rmsd0 = sqrt(rmsd0 / n_ali8);
+
+
+    //****************************************//
+    //              Final TMscore             //
+    //    Please set parameters for output    //
+    //****************************************//
+    double rmsd;
+    simplify_step=1;
+    score_sum_method=0;
+    double Lnorm_0=ylen;
+
+
+    //normalized by length of structure A
+    parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+    d0A=d0;
+    d0_0=d0A;
+    local_d0_search = d0_search;
+    TM1 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, simplify_step,
+        score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+    TM_0 = TM1;
+
+    //normalized by length of structure B
+    parameter_set4final(xlen+0.0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+    d0B=d0;
+    local_d0_search = d0_search;
+    TM2 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step,
+        score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+
+    double Lnorm_d0;
+    if (a_opt>0)
+    {
+        //normalized by average length of structures A, B
+        Lnorm_0=(xlen+ylen)*0.5;
+        parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+        d0a=d0;
+        d0_0=d0a;
+        local_d0_search = d0_search;
+
+        TM3 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM3;
+    }
+    if (u_opt)
+    {
+        //normalized by user assigned length
+        parameter_set4final(Lnorm_ass, D0_MIN, Lnorm,
+            d0, d0_search, mol_type);
+        d0u=d0;
+        d0_0=d0u;
+        Lnorm_0=Lnorm_ass;
+        local_d0_search = d0_search;
+        TM4 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM4;
+    }
+    if (d_opt)
+    {
+        //scaled by user assigned d0
+        parameter_set4scale(ylen, d0_scale, Lnorm, d0, d0_search);
+        d0_out=d0_scale;
+        d0_0=d0_scale;
+        //Lnorm_0=ylen;
+        Lnorm_d0=Lnorm_0;
+        local_d0_search = d0_search;
+        TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM5;
+    }
+
+    /* derive alignment from superposition */
+    int ali_len=xlen+ylen; //maximum length of alignment
+    seqxA.assign(ali_len,'-');
+    seqM.assign( ali_len,' ');
+    seqyA.assign(ali_len,'-');
+    
+    //do_rotation(xa, xt, xlen, t, u);
+    do_rotation(xa, xt, xlen, t0, u0);
+
+    int kk=0, i_old=0, j_old=0;
+    d=0;
+    for(int k=0; k<n_ali8; k++)
+    {
+        for(int i=i_old; i<m1[k]; i++)
+        {
+            //align x to gap
+            seqxA[kk]=seqx[i];
+            seqyA[kk]='-';
+            seqM[kk]=' ';                    
+            kk++;
+        }
+
+        for(int j=j_old; j<m2[k]; j++)
+        {
+            //align y to gap
+            seqxA[kk]='-';
+            seqyA[kk]=seqy[j];
+            seqM[kk]=' ';
+            kk++;
+        }
+
+        seqxA[kk]=seqx[m1[k]];
+        seqyA[kk]=seqy[m2[k]];
+        Liden+=(seqxA[kk]==seqyA[kk]);
+        d=sqrt(dist(&xt[m1[k]][0], &ya[m2[k]][0]));
+        if(d<d0_out) seqM[kk]=':';
+        else         seqM[kk]='.';
+        kk++;  
+        i_old=m1[k]+1;
+        j_old=m2[k]+1;
+    }
+
+    //tail
+    for(int i=i_old; i<xlen; i++)
+    {
+        //align x to gap
+        seqxA[kk]=seqx[i];
+        seqyA[kk]='-';
+        seqM[kk]=' ';
+        kk++;
+    }    
+    for(int j=j_old; j<ylen; j++)
+    {
+        //align y to gap
+        seqxA[kk]='-';
+        seqyA[kk]=seqy[j];
+        seqM[kk]=' ';
+        kk++;
+    }
+    seqxA=seqxA.substr(0,kk);
+    seqyA=seqyA.substr(0,kk);
+    seqM =seqM.substr(0,kk);
+
+    /* free memory */
+    clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+        xtm, ytm, xt, r1, r2, xlen, minlen);
+    delete [] m1;
+    delete [] m2;
+    return 0; // zero for no exception
+}
+
+void MMalign_dimer(double & total_score, 
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int len_aa, int len_na, int chain1_num, int chain2_num, double **TMave_mat,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
+    int *assign1_list, int *assign2_list, vector<string>&sequence,
+    double d0_scale, bool fast_opt)
+{
+    int i,j;
+    int xlen=0;
+    int ylen=0;
+    vector<int> xlen_dimer;
+    vector<int> ylen_dimer;
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+        xlen+=xlen_vec[i];
+        ylen+=ylen_vec[j];
+        xlen_dimer.push_back(xlen_vec[i]);
+        ylen_dimer.push_back(ylen_vec[j]);
+    }
+    if (xlen<=3 || ylen<=3) return;
+
+    bool **mask; // mask out inter-chain region
+    NewArray(&mask, xlen+1, ylen+1);
+    for (i=0;i<xlen+1;i++) for (j=0;j<ylen+1;j++) mask[i][j]=false;
+    for (i=0;i<xlen_dimer[0]+1;i++) mask[i][0]=true;
+    for (j=0;j<ylen_dimer[0]+1;j++) mask[0][j]=true;
+    int c,prev_xlen,prev_ylen;
+    prev_xlen=1;
+    prev_ylen=1;
+    for (c=0;c<xlen_dimer.size();c++)
+    {
+        for (i=prev_xlen;i<prev_xlen+xlen_dimer[c];i++)
+            for (j=prev_ylen;j<prev_ylen+ylen_dimer[c];j++) mask[i][j]=true;
+        prev_xlen+=xlen_dimer[c];
+        prev_ylen+=ylen_dimer[c];
+    }
+    vector<int>().swap(xlen_dimer);
+    vector<int>().swap(ylen_dimer);
+
+    seqx = new char[xlen+1];
+    secx = new char[xlen+1];
+    NewArray(&xa, xlen, 3);
+    seqy = new char[ylen+1];
+    secy = new char[ylen+1];
+    NewArray(&ya, ylen, 3);
+
+    int mol_type=copy_chain_pair_data(xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, chain1_num, chain2_num,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence);
+
+    /* declare variable specific to this pair of TMalign */
+    double t0[3], u0[3][3];
+    double TM1, TM2;
+    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+    double d0_0, TM_0;
+    double d0A, d0B, d0u, d0a;
+    double d0_out=5.0;
+    string seqM, seqxA, seqyA;// for output alignment
+    double rmsd0 = 0.0;
+    int L_ali;                // Aligned length in standard_TMscore
+    double Liden=0;
+    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+    int n_ali=0;
+    int n_ali8=0;
+
+    double Lnorm_ass=len_aa+len_na;
+
+    TMalign_dimer_main(xa, ya, seqx, seqy, secx, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, mask, sequence, Lnorm_ass, d0_scale,
+        1, false, true, false, fast_opt, mol_type, -1);
+
+    /* clean up TM-align */
+    delete [] seqx;
+    delete [] seqy;
+    delete [] secx;
+    delete [] secy;
+    DeleteArray(&xa,xlen);
+    DeleteArray(&ya,ylen);
+    DeleteArray(&mask,xlen+1);
+
+    /* re-compute chain level alignment */
+    total_score=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        xlen=xlen_vec[i];
+        if (xlen<3)
+        {
+            for (j=0;j<chain2_num;j++) TMave_mat[i][j]=-1;
+            continue;
+        }
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
+            xlen,xa,seqx,secx);
+
+        double **xt;
+        NewArray(&xt, xlen, 3);
+        do_rotation(xa, xt, xlen, t0, u0);
+
+        for (j=0;j<chain2_num;j++)
+        {
+            if (mol_vec1[i]*mol_vec2[j]<0) //no protein-RNA alignment
+            {
+                TMave_mat[i][j]=-1;
+                continue;
+            }
+
+            ylen=ylen_vec[j];
+            if (ylen<3)
+            {
+                TMave_mat[i][j]=-1;
+                continue;
+            }
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            NewArray(&ya, ylen, 3);
+            copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
+                ylen,ya,seqy,secy);
+
+            /* declare variable specific to this pair of TMalign */
+            d0_out=5.0;
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+            rmsd0 = 0.0;
+            Liden=0;
+            int *invmap = new int[ylen+1];
+
+            double Lnorm_ass=len_aa;
+            if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_ass=len_na;
+
+            /* entry function for structure alignment */
+            se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                0, false, 2, false, mol_vec1[i]+mol_vec2[j], 1, invmap);
+
+            /* print result */
+            seqxA_mat[i][j]=seqxA;
+            seqyA_mat[i][j]=seqyA;
+
+            TMave_mat[i][j]=TM4*Lnorm_ass;
+            if (assign1_list[i]==j)
+            {
+                if (TM4<=0) assign1_list[i]=assign2_list[j]=-1;
+                else        total_score+=TMave_mat[i][j];
+            }
+
+            /* clean up */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+            delete[]invmap;
+        }
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+        DeleteArray(&xt,xlen);
+    }
+    return;
+}
+
+void MMalign_cross(double & max_total_score, const int max_iter,
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int len_aa, int len_na, int chain1_num, int chain2_num, double **TMave_mat,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
+    int *assign1_list, int *assign2_list, vector<string>&sequence,
+    double d0_scale, bool fast_opt)
+{
+    /* tmp assignment */
+    int *assign1_tmp, *assign2_tmp;
+    assign1_tmp=new int[chain1_num];
+    assign2_tmp=new int[chain2_num];
+    double **TMave_tmp;
+    NewArray(&TMave_tmp,chain1_num,chain2_num);
+    vector<string> tmp_str_vec(chain2_num,"");
+    vector<vector<string> >seqxA_tmp(chain1_num,tmp_str_vec);
+    vector<vector<string> >seqyA_tmp(chain1_num,tmp_str_vec);
+    vector<string> sequence_tmp;
+    copy_chain_assign_data(chain1_num, chain2_num, sequence_tmp,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat,
+        seqxA_tmp, seqyA_tmp, assign1_tmp,  assign2_tmp,  TMave_tmp);
+
+    double total_score=MMalign_search(xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num,
+        TMave_tmp, seqxA_tmp, seqyA_tmp, assign1_tmp, assign2_tmp, sequence_tmp,
+        d0_scale, fast_opt, 1);
+    if (total_score>max_total_score)
+    {
+        copy_chain_assign_data(chain1_num, chain2_num, sequence,
+            seqxA_tmp, seqyA_tmp, assign1_tmp,  assign2_tmp,  TMave_tmp,
+            seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat);
+        max_total_score=total_score;
+    }
+
+    if (max_iter) MMalign_iter(
+        max_total_score, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num,
+        TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence,
+        d0_scale, fast_opt);
+
+    /* clean up everything */
+    delete [] assign1_tmp;
+    delete [] assign2_tmp;
+    DeleteArray(&TMave_tmp,chain1_num);
+    vector<string>().swap(tmp_str_vec);
+    vector<vector<string> >().swap(seqxA_tmp);
+    vector<vector<string> >().swap(seqyA_tmp);
+    vector<string>().swap(sequence_tmp);
+    return;
+}
+
+/* return the number of chains that are trimmed */
+int trimComplex(vector<vector<vector<double> > >&a_trim_vec,
+    vector<vector<char> >&seq_trim_vec, vector<vector<char> >&sec_trim_vec,
+    vector<int>&len_trim_vec,
+    const vector<vector<vector<double> > >&a_vec,
+    const vector<vector<char> >&seq_vec, const vector<vector<char> >&sec_vec,
+    const vector<int> &len_vec, const vector<int> &mol_vec,
+    const int Lchain_aa_max, const int Lchain_na_max)
+{
+    int trim_chain_count=0;
+    int chain_num=a_vec.size();
+    int i,j;
+    int r1,r2;
+    double dinter;
+    double dinter_min;
+    vector<pair<double,int> >dinter_vec;
+    vector<bool> include_vec;
+    vector<char> seq_empty;
+    vector<vector<double> >  a_empty;
+    vector<double> xcoor(3,0);
+    vector<double> ycoor(3,0);
+    int xlen,ylen;
+    int Lchain_max;
+    double expand=2;
+    for (i=0;i<chain_num;i++)
+    {
+        xlen=len_vec[i];
+        if (mol_vec[i]>0) Lchain_max=Lchain_na_max*expand;
+        else              Lchain_max=Lchain_aa_max*expand;
+        if (Lchain_max<3) Lchain_max=3;
+        if (xlen<=Lchain_max || xlen<=3)
+        {
+            a_trim_vec.push_back(a_vec[i]);
+            seq_trim_vec.push_back(seq_vec[i]);
+            sec_trim_vec.push_back(sec_vec[i]);
+            len_trim_vec.push_back(xlen);
+            continue;
+        }
+        trim_chain_count++;
+        for (r1=0;r1<xlen;r1++)
+        {
+            xcoor[0]=a_vec[i][r1][0];
+            xcoor[1]=a_vec[i][r1][1];
+            xcoor[2]=a_vec[i][r1][2];
+            dinter_min=FLT_MAX;
+            for (j=0;j<chain_num;j++)
+            {
+                if (i==j) continue;
+                ylen=len_vec[j];
+                for (r2=0;r2<ylen;r2++)
+                {
+                    ycoor[0]=a_vec[j][r2][0];
+                    ycoor[1]=a_vec[j][r2][1];
+                    ycoor[2]=a_vec[j][r2][2];
+                    dinter=(xcoor[0]-ycoor[0])*(xcoor[0]-ycoor[0])+
+                           (xcoor[1]-ycoor[1])*(xcoor[1]-ycoor[1])+
+                           (xcoor[2]-ycoor[2])*(xcoor[2]-ycoor[2]);
+                    if (dinter<dinter_min) dinter_min=dinter;
+                }
+            }
+            dinter_vec.push_back(make_pair(dinter,r1));
+        }
+        sort(dinter_vec.begin(),dinter_vec.end());
+        include_vec.assign(xlen,false);
+        for (r1=0;r1<Lchain_max;r1++)
+            include_vec[dinter_vec[r1].second]=true;
+        dinter_vec.clear();
+
+        a_trim_vec.push_back(a_empty);
+        seq_trim_vec.push_back(seq_empty);
+        sec_trim_vec.push_back(seq_empty);
+        len_trim_vec.push_back(Lchain_max);
+        for (r1=0;r1<xlen;r1++)
+        {
+            if (include_vec[r1]==false) continue;
+            a_trim_vec[i].push_back(a_vec[i][r1]);
+            seq_trim_vec[i].push_back(seq_vec[i][r1]);
+            sec_trim_vec[i].push_back(sec_vec[i][r1]);
+        }
+        include_vec.clear();
+    }
+    vector<pair<double,int> >().swap(dinter_vec);
+    vector<bool>().swap(include_vec);
+    vector<double> ().swap(xcoor);
+    vector<double> ().swap(ycoor);
+    return trim_chain_count;
+}
+
+void writeTrimComplex(vector<vector<vector<double> > >&a_trim_vec,
+    vector<vector<char> >&seq_trim_vec, vector<int>&len_trim_vec,
+    vector<string>&chainID_list, vector<int>&mol_vec,
+    const string &atom_opt, string filename)
+{
+    int c,r;
+    int a=0;
+    string chainID;
+    string atom;
+    ofstream fp(filename.c_str());
+    for (c=0;c<chainID_list.size();c++)
+    {
+        chainID=chainID_list[c];
+        if (chainID.size()==1) chainID=" "+chainID;
+        else if (chainID.size()>2) chainID=chainID.substr(chainID.size()-2,2);
+        if (chainID[0]==':') chainID=" "+chainID.substr(1);
+        atom=atom_opt;
+        if (atom_opt=="auto")
+        {
+            if (mol_vec[c]>0) atom=" C3'";
+            else              atom=" CA ";
+        }
+
+        for (r=0;r<len_trim_vec[c];r++)
+            fp<<"ATOM  "<<resetiosflags(ios::right)<<setw(5)<<++a<<' '
+              <<atom<<' '<<AAmap(seq_trim_vec[c][r])<<chainID
+              <<setw(4)<<r+1<<"    "
+              <<setiosflags(ios::fixed)<<setprecision(3)
+              <<setw(8)<<a_trim_vec[c][r][0]
+              <<setw(8)<<a_trim_vec[c][r][1]
+              <<setw(8)<<a_trim_vec[c][r][2]<<endl;
+    }
+    fp.close();
+    atom.clear();
+    chainID.clear();
+    return;
+}
+
+void output_dock_rotation_matrix(const char* fname_matrix,
+    const vector<string>&xname_vec, const vector<string>&yname_vec,
+    double ** ut_mat, int *assign1_list)
+{
+    fstream fout;
+    fout.open(fname_matrix, ios::out | ios::trunc);
+    if (fout)// succeed
+    {
+        int i,k;
+        for (i=0;i<xname_vec.size();i++)
+        {
+            if (assign1_list[i]<0) continue;
+            fout << "------ The rotation matrix to rotate "
+                 <<xname_vec[i]<<" to "<<yname_vec[i]<<" ------\n"
+                 << "m               t[m]        u[m][0]        u[m][1]        u[m][2]\n";
+            for (k = 0; k < 3; k++)
+                fout<<k<<setiosflags(ios::fixed)<<setprecision(10)
+                    <<' '<<setw(18)<<ut_mat[i][9+k]
+                    <<' '<<setw(14)<<ut_mat[i][3*k+0]
+                    <<' '<<setw(14)<<ut_mat[i][3*k+1]
+                    <<' '<<setw(14)<<ut_mat[i][3*k+2]<<'\n';
+        }
+        fout << "\nCode for rotating Structure 1 from (x,y,z) to (X,Y,Z):\n"
+                "for(i=0; i<L; i++)\n"
+                "{\n"
+                "   X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n"
+                "   Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n"
+                "   Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n"
+                "}"<<endl;
+        fout.close();
+    }
+    else
+        cout << "Open file to output rotation matrix fail.\n";
+}
diff --git a/modules/bindings/src/tmalign/NW.h b/modules/bindings/src/USalign/NW.h
similarity index 80%
rename from modules/bindings/src/tmalign/NW.h
rename to modules/bindings/src/USalign/NW.h
index 4c9984853..66e7e94f7 100644
--- a/modules/bindings/src/tmalign/NW.h
+++ b/modules/bindings/src/USalign/NW.h
@@ -259,6 +259,100 @@ void NWDP_SE(bool **path, double **val, double **x, double **y,
     }
 }
 
+void NWDP_SE(bool **path, double **val, double **x, double **y,
+    int len1, int len2, double d02, double gap_open, int j2i[],
+    const int hinge)
+{
+    if (hinge==0)
+    {
+        NWDP_SE(path, val, x, y, len1, len2, d02, gap_open, j2i);
+        return;
+    }
+    int i, j;
+    double h, v, d;
+
+    int L=(len2>len1)?len2:len1;
+    int int_min=L*(gap_open-1);
+
+    for (i=0; i<=len1; i++)
+    {
+        for (j=0; j<=len2; j++)
+        {
+            val[i][j]=0;
+            path[i][j]=false;
+        }
+    }
+
+    /* fill in old j2i */
+    int k=0;
+    for (j=0; j<len2; j++)
+    {
+        i=j2i[j];
+        if (i<0) continue;
+        path[i+1][j+1]=true;
+        val[i+1][j+1]=0;
+    }
+
+    double dij;
+
+    //decide matrix and path
+    for(i=1; i<=len1; i++)
+    {
+        for(j=1; j<=len2; j++)
+        {
+            dij=0;
+            if (path[i][j]==false) dij=dist(&x[i-1][0], &y[j-1][0]);    
+            d=val[i-1][j-1] +  1.0/(1+dij/d02);
+
+            //symbol insertion in horizontal (= a gap in vertical)
+            h=val[i-1][j];
+            if(path[i-1][j]) h += gap_open; //aligned in last position
+
+            //symbol insertion in vertical
+            v=val[i][j-1];
+            if(path[i][j-1]) v += gap_open; //aligned in last position
+
+
+            if(d>=h && d>=v && val[i][j]==0)
+            {
+                path[i][j]=true; //from diagonal
+                val[i][j]=d;
+            }
+            else 
+            {
+                path[i][j]=false; //from horizontal
+                if(v>=h) val[i][j]=v;
+                else val[i][j]=h;
+            }
+        } //for i
+    } //for j
+
+    //trace back to extract the alignment
+    for (j=0;j<=len2;j++) j2i[j]=-1;
+    i=len1;
+    j=len2;
+    while(i>0 && j>0)
+    {
+        if(path[i][j]) //from diagonal
+        {
+            j2i[j-1]=i-1;
+            i--;
+            j--;
+        }
+        else 
+        {
+            h=val[i-1][j];
+            if(path[i-1][j]) h +=gap_open;
+
+            v=val[i][j-1];
+            if(path[i][j-1]) v +=gap_open;
+
+            if(v>=h) j--;
+            else i--;
+        }
+    }
+}
+
 /* +ss
  * Input: secondary structure secx, secy, and gap_open
  * Output: j2i[1:len2] \in {1:len1} U {-1}
diff --git a/modules/bindings/src/tmalign/NWalign.cpp b/modules/bindings/src/USalign/NWalign.cpp
similarity index 100%
rename from modules/bindings/src/tmalign/NWalign.cpp
rename to modules/bindings/src/USalign/NWalign.cpp
diff --git a/modules/bindings/src/tmalign/NWalign.h b/modules/bindings/src/USalign/NWalign.h
similarity index 72%
rename from modules/bindings/src/tmalign/NWalign.h
rename to modules/bindings/src/USalign/NWalign.h
index 2c7e36a11..7d6856b98 100644
--- a/modules/bindings/src/tmalign/NWalign.h
+++ b/modules/bindings/src/USalign/NWalign.h
@@ -502,7 +502,7 @@ void output_NWalign_results(
         printf(">%s%s\tL=%d\tseqID=%.3f\n",
             yname.c_str(), chainID2, ylen, Liden/ylen);
         printf("%s\n", seqyA);
-        printf("# Lali=%d\tseqID_ali=%.3f\n", L_ali, Liden/L_ali);
+        printf("#score=%d\tLali=%d\tseqID_ali=%.3f\n", aln_score, L_ali, Liden/L_ali);
         printf("$$$$\n");
     }
     else if (outfmt_opt==2)
@@ -515,4 +515,200 @@ void output_NWalign_results(
     cout << endl;
 }
 
+/* extract pairwise sequence alignment from residue index vectors,
+ * assuming that "sequence" contains two empty strings.
+ * return length of alignment, including gap. */
+int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
+    const vector<string> resi_vec1, const vector<string> resi_vec2,
+    const int byresi_opt)
+{
+    sequence.clear();
+    sequence.push_back("");
+    sequence.push_back("");
+
+    int i1=0; // positions in resi_vec1
+    int i2=0; // positions in resi_vec2
+    int xlen=resi_vec1.size();
+    int ylen=resi_vec2.size();
+    if (byresi_opt==4 || byresi_opt==5) // global or glocal sequence alignment
+    {
+        int *invmap;
+        int glocal=0;
+        if (byresi_opt==5) glocal=2;
+        int mol_type=0;
+        for (i1=0;i1<xlen;i1++)
+            if ('a'<seqx[i1] && seqx[i1]<'z') mol_type++;
+            else mol_type--;
+        for (i2=0;i2<ylen;i2++)
+            if ('a'<seqx[i2] && seqx[i2]<'z') mol_type++;
+            else mol_type--;
+        NWalign_main(seqx, seqy, xlen, ylen, sequence[0],sequence[1],
+            mol_type, invmap, 0, glocal);
+    }
+
+
+    map<string,string> chainID_map1;
+    map<string,string> chainID_map2;
+    if (byresi_opt==3)
+    {
+        vector<string> chainID_vec;
+        string chainID;
+        stringstream ss;
+        int i;
+        for (i=0;i<xlen;i++)
+        {
+            chainID=resi_vec1[i].substr(5);
+            if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
+            {
+                chainID_vec.push_back(chainID);
+                ss<<chainID_vec.size();
+                chainID_map1[chainID]=ss.str();
+                ss.str("");
+            }
+        }
+        chainID_vec.clear();
+        for (i=0;i<ylen;i++)
+        {
+            chainID=resi_vec2[i].substr(5);
+            if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
+            {
+                chainID_vec.push_back(chainID);
+                ss<<chainID_vec.size();
+                chainID_map2[chainID]=ss.str();
+                ss.str("");
+            }
+        }
+        vector<string>().swap(chainID_vec);
+    }
+    string chainID1="";
+    string chainID2="";
+    string chainID1_prev="";
+    string chainID2_prev="";
+    while(i1<xlen && i2<ylen)
+    {
+        if (byresi_opt==2)
+        {
+            chainID1=resi_vec1[i1].substr(5);
+            chainID2=resi_vec2[i2].substr(5);
+        }
+        else if (byresi_opt==3)
+        {
+            chainID1=chainID_map1[resi_vec1[i1].substr(5)];
+            chainID2=chainID_map2[resi_vec2[i2].substr(5)];
+        }
+
+        if (chainID1==chainID2)
+        {
+            if (atoi(resi_vec1[i1].substr(0,4).c_str())<
+                atoi(resi_vec2[i2].substr(0,4).c_str()))
+            {
+                sequence[0]+=seqx[i1++];
+                sequence[1]+='-';
+            }
+            else if (atoi(resi_vec1[i1].substr(0,4).c_str())>
+                     atoi(resi_vec2[i2].substr(0,4).c_str()))
+            {
+                sequence[0]+='-';
+                sequence[1]+=seqy[i2++];
+            }
+            else
+            {
+                sequence[0]+=seqx[i1++];
+                sequence[1]+=seqy[i2++];
+            }
+            chainID1_prev=chainID1;
+            chainID2_prev=chainID2;
+        }
+        else
+        {
+            if (chainID1_prev==chainID1 && chainID2_prev!=chainID2)
+            {
+                sequence[0]+=seqx[i1++];
+                sequence[1]+='-';
+                chainID1_prev=chainID1;
+            }
+            else if (chainID1_prev!=chainID1 && chainID2_prev==chainID2)
+            {
+                sequence[0]+='-';
+                sequence[1]+=seqy[i2++];
+                chainID2_prev=chainID2;
+            }
+            else
+            {
+                sequence[0]+=seqx[i1++];
+                sequence[1]+=seqy[i2++];
+                chainID1_prev=chainID1;
+                chainID2_prev=chainID2;
+            }
+        }
+        
+    }
+    map<string,string>().swap(chainID_map1);
+    map<string,string>().swap(chainID_map2);
+    chainID1.clear();
+    chainID2.clear();
+    chainID1_prev.clear();
+    chainID2_prev.clear();
+    return sequence[0].size();
+}
+
+/* extract pairwise sequence alignment from residue index vectors,
+ * return length of alignment, including gap. */
+int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
+    const vector<string> resi_vec1, const vector<string> resi_vec2,
+    const vector<int> xlen_vec, const vector<int> ylen_vec,
+    const int chain_i, const int chain_j)
+{
+    sequence.clear();
+    sequence.push_back("");
+    sequence.push_back("");
+
+    int i1=0; // positions in resi_vec1
+    int i2=0; // positions in resi_vec2
+    int xlen=xlen_vec[chain_i];
+    int ylen=ylen_vec[chain_j];
+    int i,j;
+    for (i=0;i<chain_i;i++) i1+=xlen_vec[i];
+    for (j=0;j<chain_j;j++) i2+=ylen_vec[j];
+
+    i=j=0;
+    while(i<xlen && j<ylen)
+    {
+        if (atoi(resi_vec1[i+i1].substr(0,4).c_str())<
+            atoi(resi_vec2[j+i2].substr(0,4).c_str()))
+        {
+            sequence[0]+=seqx[i++];
+            sequence[1]+='-';
+        }
+        else if (atoi(resi_vec1[i+i1].substr(0,4).c_str())>
+                 atoi(resi_vec2[j+i2].substr(0,4).c_str()))
+        {
+            sequence[0]+='-';
+            sequence[1]+=seqy[j++];
+        }
+        else
+        {
+            sequence[0]+=seqx[i++];
+            sequence[1]+=seqy[j++];
+        }
+    }
+    if (i<xlen && j==ylen)
+    {
+        for (i;i<xlen;i++)
+        {
+            sequence[0]+=seqx[i];
+            sequence[1]+='-';
+        }
+    }
+    else if (i==xlen && j<ylen)
+    {
+        for (j;j<ylen;j++)
+        {
+            sequence[0]+='-';
+            sequence[1]+=seqy[j];
+        }
+    }
+    return sequence[0].size();
+}
+
 #endif
diff --git a/modules/bindings/src/USalign/OST_INFO b/modules/bindings/src/USalign/OST_INFO
new file mode 100644
index 000000000..42124da83
--- /dev/null
+++ b/modules/bindings/src/USalign/OST_INFO
@@ -0,0 +1,6 @@
+Source code has been cloned May 4 2023 from:
+
+https://github.com/pylelab/USalign
+
+last commit:
+8d968e0111ca275958f209d76b1cd10598864a34
diff --git a/modules/bindings/src/tmalign/PDB1.pdb b/modules/bindings/src/USalign/PDB1.pdb
similarity index 100%
rename from modules/bindings/src/tmalign/PDB1.pdb
rename to modules/bindings/src/USalign/PDB1.pdb
diff --git a/modules/bindings/src/tmalign/PDB2.pdb b/modules/bindings/src/USalign/PDB2.pdb
similarity index 100%
rename from modules/bindings/src/tmalign/PDB2.pdb
rename to modules/bindings/src/USalign/PDB2.pdb
diff --git a/modules/bindings/src/USalign/SOIalign.h b/modules/bindings/src/USalign/SOIalign.h
new file mode 100644
index 000000000..716afbaf8
--- /dev/null
+++ b/modules/bindings/src/USalign/SOIalign.h
@@ -0,0 +1,959 @@
+#ifndef SOIalign_h
+#define SOIalign_h 1
+
+#include "TMalign.h"
+
+void print_invmap(int *invmap, const int ylen)
+{
+    int i,j;
+    for (j=0;j<ylen;j++)
+    {
+        i=invmap[j];
+        if (i>=0) cout<<" ("<<i<<","<<j<<")";
+    }
+    cout<<endl;
+}
+
+void assign_sec_bond(int **secx_bond, const char *secx, const int xlen)
+{
+    int i,j;
+    int starti=-1;
+    int endi=-1;
+    char ss;
+    char prev_ss=0;
+    for (i=0; i<xlen; i++)
+    {
+        ss=secx[i];
+        secx_bond[i][0]=secx_bond[i][1]=-1;
+        if (ss!=prev_ss && !(ss=='C' && prev_ss=='T') 
+                        && !(ss=='T' && prev_ss=='C'))
+        {
+            if (starti>=0) // previous SSE end
+            {
+                endi=i;
+                for (j=starti;j<endi;j++)
+                {
+                    secx_bond[j][0]=starti;
+                    secx_bond[j][1]=endi;
+                }
+            }
+            if (ss=='H' || ss=='E' || ss=='<' || ss=='>') starti=i;
+            else starti=-1;
+        }
+        prev_ss=secx[i];
+    }
+    if (starti>=0) // previous SSE end
+    {
+        endi=i;
+        for (j=starti;j<endi;j++)
+        {
+            secx_bond[j][0]=starti;
+            secx_bond[j][1]=endi;
+        }
+    }
+    for (i=0;i<xlen;i++) if (secx_bond[i][1]-secx_bond[i][0]==1)
+        secx_bond[i][0]=secx_bond[i][1]=-1;
+}
+
+void getCloseK(double **xa, const int xlen, const int closeK_opt, double **xk)
+{
+    double **score;
+    NewArray(&score, xlen+1, xlen+1);
+    vector<pair<double,int> > close_idx_vec(xlen, make_pair(0,0));
+    int i,j,k;
+    for (i=0;i<xlen;i++)
+    {
+        score[i+1][i+1]=0;
+        for (j=i+1;j<xlen;j++) score[j+1][i+1]=score[i+1][j+1]=dist(xa[i], xa[j]);
+    }
+    for (i=0;i<xlen;i++)
+    {
+        for (j=0;j<xlen;j++)
+        {
+            close_idx_vec[j].first=score[i+1][j+1];
+            close_idx_vec[j].second=j;
+        }
+        sort(close_idx_vec.begin(), close_idx_vec.end());
+        for (k=0;k<closeK_opt;k++)
+        {
+            j=close_idx_vec[k % xlen].second;
+            xk[i*closeK_opt+k][0]=xa[j][0];
+            xk[i*closeK_opt+k][1]=xa[j][1];
+            xk[i*closeK_opt+k][2]=xa[j][2];
+        }
+    }
+
+    /* clean up */
+    vector<pair<double,int> >().swap(close_idx_vec);
+    DeleteArray(&score, xlen+1);
+}
+
+/* check if pairing i to j conform to sequantiality within the SSE */
+inline bool sec2sq(const int i, const int j,
+    int **secx_bond, int **secy_bond, int *fwdmap, int *invmap)
+{
+    if (i<0 || j<0) return true;
+    int ii,jj;
+    if (secx_bond[i][0]>=0)
+    {
+        for (ii=secx_bond[i][0];ii<secx_bond[i][1];ii++)
+        {
+            jj=fwdmap[ii];
+            if (jj>=0 && (i-ii)*(j-jj)<=0) return false;
+        }
+    }
+    if (secy_bond[j][0]>=0)
+    {
+        for (jj=secy_bond[j][0];jj<secy_bond[j][1];jj++)
+        {
+            ii=invmap[jj];
+            if (ii>=0 && (i-ii)*(j-jj)<=0) return false;
+        }
+    }
+    return true;
+}
+
+void soi_egs(double **score, const int xlen, const int ylen, int *invmap,
+    int **secx_bond, int **secy_bond, const int mm_opt)
+{
+    int i,j;
+    int *fwdmap=new int[xlen]; // j=fwdmap[i];
+    for (i=0; i<xlen; i++) fwdmap[i]=-1;
+    for (j=0; j<ylen; j++)
+    {
+        i=invmap[j];
+        if (i>=0) fwdmap[i]=j;
+    }
+
+    /* stage 1 - make initial assignment, starting from the highest score pair */
+    double max_score;
+    int maxi,maxj;
+    while(1)
+    {
+        max_score=0;
+        maxi=maxj=-1;
+        for (i=0;i<xlen;i++)
+        {
+            if (fwdmap[i]>=0) continue;
+            for (j=0;j<ylen;j++)
+            {
+                if (invmap[j]>=0 || score[i+1][j+1]<=max_score) continue;
+                if (mm_opt==6 && !sec2sq(i,j,secx_bond,secy_bond,
+                    fwdmap,invmap)) continue;
+                maxi=i;
+                maxj=j;
+                max_score=score[i+1][j+1];
+            }
+        }
+        if (maxi<0) break; // no assignment;
+        invmap[maxj]=maxi;
+        fwdmap[maxi]=maxj;
+    }
+
+    double total_score=0;
+    for (j=0;j<ylen;j++)
+    {
+        i=invmap[j];
+        if (i>=0) total_score+=score[i+1][j+1];
+    }
+
+    /* stage 2 - swap assignment until total score cannot be improved */
+    int iter;
+    int oldi,oldj;
+    double delta_score;
+    for (iter=0; iter<getmin(xlen,ylen)*5; iter++)
+    {
+        //cout<<"total_score="<<total_score<<".iter="<<iter<<endl;
+        //print_invmap(invmap,ylen);
+        delta_score=-1;
+        for (i=0;i<xlen;i++)
+        {
+            oldj=fwdmap[i];
+            for (j=0;j<ylen;j++)
+            {
+                oldi=invmap[j];
+                if (score[i+1][j+1]<=0 || oldi==i) continue;
+                if (mm_opt==6 && (!sec2sq(i,j,secx_bond,secy_bond,fwdmap,invmap) ||
+                            !sec2sq(oldi,oldj,secx_bond,secy_bond,fwdmap,invmap)))
+                    continue;
+                delta_score=score[i+1][j+1];
+                if (oldi>=0 && oldj>=0) delta_score+=score[oldi+1][oldj+1];
+                if (oldi>=0) delta_score-=score[oldi+1][j+1];
+                if (oldj>=0) delta_score-=score[i+1][oldj+1];
+
+                if (delta_score>0) // successful swap
+                {
+                    fwdmap[i]=j;
+                    if (oldi>=0) fwdmap[oldi]=oldj;
+                    invmap[j]=i;
+                    if (oldj>=0) invmap[oldj]=oldi;
+                    total_score+=delta_score;
+                    break;
+                }
+            }
+        }
+        if (delta_score<=0) break; // cannot make further swap
+    }
+
+    /* clean up */
+    delete[]fwdmap;
+}
+
+/* entry function for se
+ * u_opt corresponds to option -L
+ *       if u_opt==2, use d0 from Lnorm_ass for alignment
+ * */
+int soi_se_main(
+    double **xa, double **ya, const char *seqx, const char *seqy,
+    double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
+    double &d0_0, double &TM_0,
+    double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
+    string &seqM, string &seqxA, string &seqyA,
+    double &rmsd0, int &L_ali, double &Liden,
+    double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
+    const int xlen, const int ylen, 
+    const double Lnorm_ass, const double d0_scale, const bool i_opt,
+    const bool a_opt, const int u_opt, const bool d_opt, const int mol_type,
+    const int outfmt_opt, int *invmap, double *dist_list,
+    int **secx_bond, int **secy_bond, const int mm_opt)
+{
+    double D0_MIN;        //for d0
+    double Lnorm;         //normalization length
+    double score_d8,d0,d0_search,dcu0;//for TMscore search
+    double **score;       // score for aligning a residue pair
+    bool   **path;        // for dynamic programming  
+    double **val;         // for dynamic programming  
+
+    int *m1=NULL;
+    int *m2=NULL;
+    int i,j;
+    double d;
+    if (outfmt_opt<2)
+    {
+        m1=new int[xlen]; //alignd index in x
+        m2=new int[ylen]; //alignd index in y
+    }
+
+    /***********************/
+    /* allocate memory     */
+    /***********************/
+    NewArray(&score, xlen+1, ylen+1);
+    NewArray(&path,  xlen+1, ylen+1);
+    NewArray(&val,   xlen+1, ylen+1);
+    //int *invmap          = new int[ylen+1];
+
+    /* set d0 */
+    parameter_set4search(xlen, ylen, D0_MIN, Lnorm,
+        score_d8, d0, d0_search, dcu0); // set score_d8
+    parameter_set4final(xlen, D0_MIN, Lnorm,
+        d0B, d0_search, mol_type); // set d0B
+    parameter_set4final(ylen, D0_MIN, Lnorm,
+        d0A, d0_search, mol_type); // set d0A
+    if (a_opt)
+        parameter_set4final((xlen+ylen)*0.5, D0_MIN, Lnorm,
+            d0a, d0_search, mol_type); // set d0a
+    if (u_opt)
+    {
+        parameter_set4final(Lnorm_ass, D0_MIN, Lnorm,
+            d0u, d0_search, mol_type); // set d0u
+        if (u_opt==2)
+        {
+            parameter_set4search(Lnorm_ass, Lnorm_ass, D0_MIN, Lnorm,
+                score_d8, d0, d0_search, dcu0); // set score_d8
+        }
+    }
+
+    /* perform alignment */
+    for(j=0; j<ylen; j++) invmap[j]=-1;
+    double d02=d0*d0;
+    double score_d82=score_d8*score_d8;
+    double d2;
+    for(i=0; i<xlen; i++)
+    {
+        for(j=0; j<ylen; j++)
+        {
+            d2=dist(xa[i], ya[j]);
+            if (d2>score_d82) score[i+1][j+1]=0;
+            else score[i+1][j+1]=1./(1+ d2/d02);
+        }
+    }
+    if (mm_opt==6) NWDP_TM(score, path, val, xlen, ylen, -0.6, invmap);
+    soi_egs(score, xlen, ylen, invmap, secx_bond, secy_bond, mm_opt);
+
+    rmsd0=TM1=TM2=TM3=TM4=TM5=0;
+    int k=0;
+    n_ali=0;
+    n_ali8=0;
+    for(j=0; j<ylen; j++)
+    {
+        i=invmap[j];
+        dist_list[j]=-1;
+        if(i>=0)//aligned
+        {
+            n_ali++;
+            d=sqrt(dist(&xa[i][0], &ya[j][0]));
+            dist_list[j]=d;
+            if (score[i+1][j+1]>0)
+            {
+                if (outfmt_opt<2)
+                {
+                    m1[k]=i;
+                    m2[k]=j;
+                }
+                k++;
+                TM2+=1/(1+(d/d0B)*(d/d0B)); // chain_1
+                TM1+=1/(1+(d/d0A)*(d/d0A)); // chain_2
+                if (a_opt) TM3+=1/(1+(d/d0a)*(d/d0a)); // -a
+                if (u_opt) TM4+=1/(1+(d/d0u)*(d/d0u)); // -u
+                if (d_opt) TM5+=1/(1+(d/d0_scale)*(d/d0_scale)); // -d
+                rmsd0+=d*d;
+            }
+        }
+    }
+    n_ali8=k;
+    TM2/=xlen;
+    TM1/=ylen;
+    TM3/=(xlen+ylen)*0.5;
+    TM4/=Lnorm_ass;
+    TM5/=ylen;
+    if (n_ali8) rmsd0=sqrt(rmsd0/n_ali8);
+
+    if (outfmt_opt>=2)
+    {
+        DeleteArray(&score, xlen+1);
+        return 0;
+    }
+
+    /* extract aligned sequence */
+    int ali_len=xlen+ylen;
+    for (j=0;j<ylen;j++) ali_len-=(invmap[j]>=0);
+    seqxA.assign(ali_len,'-');
+    seqM.assign( ali_len,' ');
+    seqyA.assign(ali_len,'-');
+
+    int *fwdmap = new int [xlen+1];
+    for (i=0;i<xlen;i++) fwdmap[i]=-1;
+    
+    for (j=0;j<ylen;j++)
+    {
+        seqyA[j]=seqy[j];
+        i=invmap[j];
+        if (i<0) continue;
+        if (sqrt(dist(xa[i], ya[j]))<d0_out) seqM[j]=':';
+        else seqM[j]='.';
+        fwdmap[i]=j;
+        seqxA[j]=seqx[i];
+        Liden+=(seqxA[k]==seqyA[k]);
+    }
+    k=0;
+    for (i=0;i<xlen;i++)
+    {
+        j=fwdmap[i];
+        if (j>=0) continue;
+        seqxA[ylen+k]=seqx[i];
+        k++;
+    }
+
+    /* free memory */
+    delete [] fwdmap;
+    delete [] m1;
+    delete [] m2;
+    DeleteArray(&score, xlen+1);
+    DeleteArray(&path, xlen+1);
+    DeleteArray(&val, xlen+1);
+    return 0; // zero for no exception
+}
+
+inline void SOI_super2score(double **xt, double **ya, const int xlen,
+    const int ylen, double **score, double d0, double score_d8)
+{
+    int i,j;
+    double d02=d0*d0;
+    double score_d82=score_d8*score_d8;
+    double d2;
+    for (i=0; i<xlen; i++)
+    {
+        for(j=0; j<ylen; j++)
+        {
+            d2=dist(xt[i], ya[j]);
+            if (d2>score_d82) score[i+1][j+1]=0;
+            else score[i+1][j+1]=1./(1+ d2/d02);
+        }
+    }
+}
+
+//heuristic run of dynamic programing iteratively to find the best alignment
+//input: initial rotation matrix t, u
+//       vectors x and y, d0
+//output: best alignment that maximizes the TMscore, will be stored in invmap
+double SOI_iter(double **r1, double **r2, double **xtm, double **ytm,
+    double **xt, double **score, bool **path, double **val, double **xa, double **ya,
+    int xlen, int ylen, double t[3], double u[3][3], int *invmap0,
+    int iteration_max, double local_d0_search,
+    double Lnorm, double d0, double score_d8,
+    int **secx_bond, int **secy_bond, const int mm_opt, const bool init_invmap=false)
+{
+    double rmsd; 
+    int *invmap=new int[ylen+1];
+    
+    int iteration, i, j, k;
+    double tmscore, tmscore_max, tmscore_old=0;    
+    tmscore_max=-1;
+
+    //double d01=d0+1.5;
+    double d02=d0*d0;
+    double score_d82=score_d8*score_d8;
+    double d2;
+    for (iteration=0; iteration<iteration_max; iteration++)
+    {
+        if (iteration==0 && init_invmap) 
+            for (j=0;j<ylen;j++) invmap[j]=invmap0[j];
+        else
+        {
+            for (j=0; j<ylen; j++) invmap[j]=-1;
+            if (mm_opt==6) NWDP_TM(score, path, val, xlen, ylen, -0.6, invmap);
+        }
+        soi_egs(score, xlen, ylen, invmap, secx_bond, secy_bond, mm_opt);
+    
+        k=0;
+        for (j=0; j<ylen; j++) 
+        {
+            i=invmap[j];
+            if (i<0) continue;
+
+            xtm[k][0]=xa[i][0];
+            xtm[k][1]=xa[i][1];
+            xtm[k][2]=xa[i][2];
+            
+            ytm[k][0]=ya[j][0];
+            ytm[k][1]=ya[j][1];
+            ytm[k][2]=ya[j][2];
+            k++;
+        }
+
+        tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u,
+            40, 8, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+
+        if (tmscore>tmscore_max)
+        {
+            tmscore_max=tmscore;
+            for (j=0; j<ylen; j++) invmap0[j]=invmap[j];
+        }
+    
+        if (iteration>0 && fabs(tmscore_old-tmscore)<0.000001) break;       
+        tmscore_old=tmscore;
+        do_rotation(xa, xt, xlen, t, u);
+        SOI_super2score(xt, ya, xlen, ylen, score, d0, score_d8);
+    }// for iteration
+    
+    delete []invmap;
+    return tmscore_max;
+}
+
+void get_SOI_initial_assign(double **xk, double **yk, const int closeK_opt,
+    double **score, bool **path, double **val, const int xlen, const int ylen,
+    double t[3], double u[3][3], int invmap[], 
+    double local_d0_search, double d0, double score_d8,
+    int **secx_bond, int **secy_bond, const int mm_opt)
+{
+    int i,j,k;
+    double **xfrag;
+    double **xtran;
+    double **yfrag;
+    NewArray(&xfrag, closeK_opt, 3);
+    NewArray(&xtran, closeK_opt, 3);
+    NewArray(&yfrag, closeK_opt, 3);
+    double rmsd;
+    double d02=d0*d0;
+    double score_d82=score_d8*score_d8;
+    double d2;
+
+    /* fill in score */
+    for (i=0;i<xlen;i++)
+    {
+        for (k=0;k<closeK_opt;k++)
+        {
+            xfrag[k][0]=xk[i*closeK_opt+k][0];
+            xfrag[k][1]=xk[i*closeK_opt+k][1];
+            xfrag[k][2]=xk[i*closeK_opt+k][2];
+        }
+
+        for (j=0;j<ylen;j++)
+        {
+            for (k=0;k<closeK_opt;k++)
+            {
+                yfrag[k][0]=yk[j*closeK_opt+k][0];
+                yfrag[k][1]=yk[j*closeK_opt+k][1];
+                yfrag[k][2]=yk[j*closeK_opt+k][2];
+            }
+            Kabsch(xfrag, yfrag, closeK_opt, 1, &rmsd, t, u);
+            do_rotation(xfrag, xtran, closeK_opt, t, u);
+            
+            //for (k=0; k<closeK_opt; k++)
+            //{
+                //d2=dist(xtran[k], yfrag[k]);
+                //if (d2>score_d82) score[i+1][j+1]=0;
+                //else score[i+1][j+1]=1./(1+d2/d02);
+            //}
+            k=closeK_opt-1;
+            d2=dist(xtran[k], yfrag[k]);
+            if (d2>score_d82) score[i+1][j+1]=0;
+            else score[i+1][j+1]=1./(1+d2/d02);
+        }
+    }
+
+    /* initial assignment */
+    for (j=0;j<ylen;j++) invmap[j]=-1;
+    if (mm_opt==6) NWDP_TM(score, path, val, xlen, ylen, -0.6, invmap);
+    for (j=0; j<ylen;j++) i=invmap[j];
+    soi_egs(score, xlen, ylen, invmap, secx_bond, secy_bond, mm_opt);
+
+    /* clean up */
+    DeleteArray(&xfrag, closeK_opt);
+    DeleteArray(&xtran, closeK_opt);
+    DeleteArray(&yfrag, closeK_opt);
+}
+
+void SOI_assign2super(double **r1, double **r2, double **xtm, double **ytm,
+    double **xt, double **xa, double **ya,
+    const int xlen, const int ylen, double t[3], double u[3][3], int invmap[], 
+    double local_d0_search, double Lnorm, double d0, double score_d8)
+{
+    int i,j,k;
+    double rmsd;
+    double d02=d0*d0;
+    double score_d82=score_d8*score_d8;
+    double d2;
+
+    k=0;
+    for (j=0; j<ylen; j++)
+    {
+        i=invmap[j];
+        if (i<0) continue;
+        xtm[k][0]=xa[i][0];
+        xtm[k][1]=xa[i][1];
+        xtm[k][2]=xa[i][2];
+
+        ytm[k][0]=ya[j][0];
+        ytm[k][1]=ya[j][1];
+        ytm[k][2]=ya[j][2];
+        k++;
+    }
+    TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u,
+        40, 8, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+    do_rotation(xa, xt, xlen, t, u);
+}
+
+/* entry function for TM-align with circular permutation
+ * i_opt, a_opt, u_opt, d_opt, TMcut are not implemented yet */
+int SOIalign_main(double **xa, double **ya,
+    double **xk, double **yk, const int closeK_opt,
+    const char *seqx, const char *seqy, const char *secx, const char *secy,
+    double t0[3], double u0[3][3],
+    double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
+    double &d0_0, double &TM_0,
+    double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
+    string &seqM, string &seqxA, string &seqyA, int *invmap,
+    double &rmsd0, int &L_ali, double &Liden,
+    double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
+    const int xlen, const int ylen,
+    const vector<string> sequence, const double Lnorm_ass,
+    const double d0_scale, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const bool fast_opt,
+    const int mol_type, double *dist_list, 
+    int **secx_bond, int **secy_bond, const int mm_opt)
+{
+    double D0_MIN;        //for d0
+    double Lnorm;         //normalization length
+    double score_d8,d0,d0_search,dcu0;//for TMscore search
+    double t[3], u[3][3]; //Kabsch translation vector and rotation matrix
+    double **score;       // Input score table for enhanced greedy search
+    double **scoret;      // Transposed score table for enhanced greedy search
+    bool   **path;        // for dynamic programming  
+    double **val;         // for dynamic programming  
+    double **xtm, **ytm;  // for TMscore search engine
+    double **xt;          //for saving the superposed version of r_1 or xtm
+    double **yt;          //for saving the superposed version of r_2 or ytm
+    double **r1, **r2;    // for Kabsch rotation
+
+    /***********************/
+    /* allocate memory     */
+    /***********************/
+    int minlen = min(xlen, ylen);
+    int maxlen = (xlen>ylen)?xlen:ylen;
+    NewArray(&score,  xlen+1, ylen+1);
+    NewArray(&scoret, ylen+1, xlen+1);
+    NewArray(&path, maxlen+1, maxlen+1);
+    NewArray(&val,  maxlen+1, maxlen+1);
+    NewArray(&xtm, minlen, 3);
+    NewArray(&ytm, minlen, 3);
+    NewArray(&xt, xlen, 3);
+    NewArray(&yt, ylen, 3);
+    NewArray(&r1, minlen, 3);
+    NewArray(&r2, minlen, 3);
+
+    /***********************/
+    /*    parameter set    */
+    /***********************/
+    parameter_set4search(xlen, ylen, D0_MIN, Lnorm, 
+        score_d8, d0, d0_search, dcu0);
+    int simplify_step    = 40; //for simplified search engine
+    int score_sum_method = 8;  //for scoring method, whether only sum over pairs with dis<score_d8
+
+    int i,j;
+    int *fwdmap0         = new int[xlen+1];
+    int *invmap0         = new int[ylen+1];
+    
+    double TMmax=-1, TM=-1;
+    for(i=0; i<xlen; i++) fwdmap0[i]=-1;
+    for(j=0; j<ylen; j++) invmap0[j]=-1;
+    double local_d0_search = d0_search;
+    int iteration_max=(fast_opt)?2:30;
+    //if (mm_opt==6) iteration_max=1;
+
+    /*************************************************************/
+    /* initial alignment with sequence order dependent alignment */
+    /*************************************************************/
+    CPalign_main(
+        xa, ya, seqx, seqy, secx, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        i_opt, a_opt, u_opt, d_opt, fast_opt,
+        mol_type,-1);
+    if (mm_opt==6)
+    {
+        i=0;
+        j=0;
+        for (int r=0;r<seqxA.size();r++)
+        {
+            if (seqxA[r]=='*') // circular permutation point
+            {
+                for (int jj=0;jj<j;jj++) if (invmap0[jj]>=0)
+                    invmap0[jj]+=xlen - i;
+                i=0;
+                continue;
+            }
+            if (seqyA[r]!='-')
+            {
+                if (seqxA[r]!='-') invmap0[j]=i;
+                j++;
+            }
+            if (seqxA[r]!='-') i++;
+        }
+        for (j=0;j<ylen;j++)
+        {
+            i=invmap0[j];
+            if (i>=0) fwdmap0[i]=j;
+        }
+    }
+    do_rotation(xa, xt, xlen, t0, u0);
+    SOI_super2score(xt, ya, xlen, ylen, score, d0, score_d8);
+    for (i=0;i<xlen;i++) for (j=0;j<ylen;j++) scoret[j+1][i+1]=score[i+1][j+1];
+    TMmax=SOI_iter(r1, r2, xtm, ytm, xt, score, path, val, xa, ya,
+        xlen, ylen, t0, u0, invmap0, iteration_max,
+        local_d0_search, Lnorm, d0, score_d8, secx_bond, secy_bond, mm_opt, true);
+    TM   =SOI_iter(r2, r1, ytm, xtm, yt,scoret, path, val, ya, xa,
+        ylen, xlen, t0, u0, fwdmap0, iteration_max,
+        local_d0_search, Lnorm, d0, score_d8, secy_bond, secx_bond, mm_opt, true);
+    //cout<<"TM2="<<TM2<<"\tTM1="<<TM1<<"\tTMmax="<<TMmax<<"\tTM="<<TM<<endl;
+    if (TM>TMmax)
+    {
+        TMmax = TM;
+        for (j=0; j<ylen; j++) invmap0[j]=-1;
+        for (i=0; i<xlen; i++) 
+        {
+            j=fwdmap0[i];
+            if (j>=0) invmap0[j]=i;
+        }
+    }
+    
+    /***************************************************************/
+    /* initial alignment with sequence order independent alignment */
+    /***************************************************************/
+    if (closeK_opt>=3)
+    {
+        get_SOI_initial_assign(xk, yk, closeK_opt, score, path, val,
+            xlen, ylen, t, u, invmap, local_d0_search, d0, score_d8,
+            secx_bond, secy_bond, mm_opt);
+        for (i=0;i<xlen;i++) for (j=0;j<ylen;j++) scoret[j+1][i+1]=score[i+1][j+1];
+
+        SOI_assign2super(r1, r2, xtm, ytm, xt, xa, ya,
+            xlen, ylen, t, u, invmap, local_d0_search, Lnorm, d0, score_d8);
+        TM=SOI_iter(r1, r2, xtm, ytm, xt, score, path, val, xa, ya,
+            xlen, ylen, t, u, invmap, iteration_max,
+            local_d0_search, Lnorm, d0, score_d8, secx_bond, secy_bond, mm_opt);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (j = 0; j<ylen; j++) invmap0[j] = invmap[j];
+        }
+
+        for (i=0;i<xlen;i++) fwdmap0[i]=-1;
+        if (mm_opt==6) NWDP_TM(scoret, path, val, ylen, xlen, -0.6, fwdmap0);
+        soi_egs(scoret, ylen, xlen, fwdmap0, secy_bond, secx_bond, mm_opt);
+        SOI_assign2super(r2, r1, ytm, xtm, yt, ya, xa,
+            ylen, xlen, t, u, fwdmap0, local_d0_search, Lnorm, d0, score_d8);
+        TM=SOI_iter(r2, r1, ytm, xtm, yt, scoret, path, val, ya, xa, ylen, xlen, t, u,
+            fwdmap0, iteration_max, local_d0_search, Lnorm, d0, score_d8,secy_bond, secx_bond, mm_opt);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (j=0; j<ylen; j++) invmap0[j]=-1;
+            for (i=0; i<xlen; i++) 
+            {
+                j=fwdmap0[i];
+                if (j>=0) invmap0[j]=i;
+            }
+        }
+    }
+
+    //*******************************************************************//
+    //    The alignment will not be changed any more in the following    //
+    //*******************************************************************//
+    //check if the initial alignment is generated appropriately
+    bool flag=false;
+    for (i=0; i<xlen; i++) fwdmap0[i]=-1;
+    for (j=0; j<ylen; j++)
+    {
+        i=invmap0[j];
+        invmap[j]=i;
+        if (i>=0)
+        {
+            fwdmap0[i]=j;
+            flag=true;
+        }
+    }
+    if(!flag)
+    {
+        cout << "There is no alignment between the two structures! "
+             << "Program stop with no result!" << endl;
+        TM1=TM2=TM3=TM4=TM5=0;
+        return 1;
+    }
+
+
+    //********************************************************************//
+    //    Detailed TMscore search engine --> prepare for final TMscore    //
+    //********************************************************************//
+    //run detailed TMscore search engine for the best alignment, and
+    //extract the best rotation matrix (t, u) for the best alignment
+    simplify_step=1;
+    if (fast_opt) simplify_step=40;
+    score_sum_method=8;
+    TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+        invmap0, t, u, simplify_step, score_sum_method, local_d0_search,
+        false, Lnorm, score_d8, d0);
+    
+    double rmsd;
+    simplify_step=1;
+    score_sum_method=0;
+    double Lnorm_0=ylen;
+
+    //select pairs with dis<d8 for final TMscore computation and output alignment
+    int k=0;
+    int *m1, *m2;
+    double d;
+    m1=new int[xlen]; //alignd index in x
+    m2=new int[ylen]; //alignd index in y
+    copy_t_u(t, u, t0, u0);
+    
+    //****************************************//
+    //              Final TMscore 1           //
+    //****************************************//
+
+    do_rotation(xa, xt, xlen, t, u);
+    k=0;
+    n_ali=0;
+    for (i=0; i<xlen; i++)
+    {
+        j=fwdmap0[i];
+        if(j>=0)//aligned
+        {
+            n_ali++;
+            d=sqrt(dist(&xt[i][0], &ya[j][0]));
+            if (d <= score_d8)
+            {
+                m1[k]=i;
+                m2[k]=j;
+
+                xtm[k][0]=xa[i][0];
+                xtm[k][1]=xa[i][1];
+                xtm[k][2]=xa[i][2];
+
+                ytm[k][0]=ya[j][0];
+                ytm[k][1]=ya[j][1];
+                ytm[k][2]=ya[j][2];
+
+                r1[k][0] = xt[i][0];
+                r1[k][1] = xt[i][1];
+                r1[k][2] = xt[i][2];
+                r2[k][0] = ya[j][0];
+                r2[k][1] = ya[j][1];
+                r2[k][2] = ya[j][2];
+
+                k++;
+            }
+            else fwdmap0[i]=-1;
+        }
+    }
+    n_ali8=k;
+
+    Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u);// rmsd0 is used for final output, only recalculate rmsd0, not t & u
+    rmsd0 = sqrt(rmsd0 / n_ali8);
+    
+    //normalized by length of structure A
+    parameter_set4final(xlen+0.0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+    d0B=d0;
+    local_d0_search = d0_search;
+    TM2 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step,
+        score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+
+    //****************************************//
+    //              Final TMscore 2           //
+    //****************************************//
+    
+    do_rotation(xa, xt, xlen, t0, u0);
+    k=0;
+    for (j=0; j<ylen; j++)
+    {
+        i=invmap0[j];
+        if(i>=0)//aligned
+        {
+            d=sqrt(dist(&xt[i][0], &ya[j][0]));
+            if (d <= score_d8)
+            {
+                m1[k]=i;
+                m2[k]=j;
+
+                xtm[k][0]=xa[i][0];
+                xtm[k][1]=xa[i][1];
+                xtm[k][2]=xa[i][2];
+
+                ytm[k][0]=ya[j][0];
+                ytm[k][1]=ya[j][1];
+                ytm[k][2]=ya[j][2];
+
+                r1[k][0] = xt[i][0];
+                r1[k][1] = xt[i][1];
+                r1[k][2] = xt[i][2];
+                r2[k][0] = ya[j][0];
+                r2[k][1] = ya[j][1];
+                r2[k][2] = ya[j][2];
+
+                k++;
+            }
+            else invmap[j]=invmap0[j]=-1;
+        }
+    }
+
+    //normalized by length of structure B
+    parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+    d0A=d0;
+    d0_0=d0A;
+    local_d0_search = d0_search;
+    TM1 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, simplify_step,
+        score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+    TM_0 = TM1;
+
+    if (a_opt>0)
+    {
+        //normalized by average length of structures A, B
+        Lnorm_0=(xlen+ylen)*0.5;
+        parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+        d0a=d0;
+        d0_0=d0a;
+        local_d0_search = d0_search;
+
+        TM3 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM3;
+    }
+    if (u_opt)
+    {
+        //normalized by user assigned length
+        parameter_set4final(Lnorm_ass, D0_MIN, Lnorm,
+            d0, d0_search, mol_type);
+        d0u=d0;
+        d0_0=d0u;
+        Lnorm_0=Lnorm_ass;
+        local_d0_search = d0_search;
+        TM4 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM4;
+    }
+    if (d_opt)
+    {
+        //scaled by user assigned d0
+        parameter_set4scale(ylen, d0_scale, Lnorm, d0, d0_search);
+        d0_out=d0_scale;
+        d0_0=d0_scale;
+        //Lnorm_0=ylen;
+        local_d0_search = d0_search;
+        TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM5;
+    }
+
+    /* derive alignment from superposition */
+    int ali_len=xlen+ylen;
+    for (j=0;j<ylen;j++) ali_len-=(invmap0[j]>=0);
+    seqxA.assign(ali_len,'-');
+    seqM.assign( ali_len,' ');
+    seqyA.assign(ali_len,'-');
+    
+    //do_rotation(xa, xt, xlen, t, u);
+    do_rotation(xa, xt, xlen, t0, u0);
+
+    Liden=0;
+    //double SO=0;
+    for (j=0;j<ylen;j++)
+    {
+        seqyA[j]=seqy[j];
+        i=invmap0[j];
+        dist_list[j]=-1;
+        if (i<0) continue;
+        d=sqrt(dist(xt[i], ya[j]));
+        if (d<d0_out) seqM[j]=':';
+        else seqM[j]='.';
+        dist_list[j]=d;
+        //SO+=(d<3.5);
+        seqxA[j]=seqx[i];
+        Liden+=(seqx[i]==seqy[j]);
+    }
+    //SO/=getmin(xlen,ylen);
+    k=0;
+    for (i=0;i<xlen;i++)
+    {
+        j=fwdmap0[i];
+        if (j>=0) continue;
+        seqxA[ylen+k]=seqx[i];
+        k++;
+    }
+    //cout<<n_ali8<<'\t'
+        //<<rmsd0<<'\t'
+        //<<100.*SO<<endl;
+
+
+    /* clean up */
+    DeleteArray(&score, xlen+1);
+    DeleteArray(&scoret,ylen+1);
+    DeleteArray(&path,maxlen+1);
+    DeleteArray(&val, maxlen+1);
+    DeleteArray(&xtm, minlen);
+    DeleteArray(&ytm, minlen);
+    DeleteArray(&xt,xlen);
+    DeleteArray(&yt,ylen);
+    DeleteArray(&r1, minlen);
+    DeleteArray(&r2, minlen);
+    delete[]invmap0;
+    delete[]fwdmap0;
+    delete[]m1;
+    delete[]m2;
+    return 0;
+}
+#endif
diff --git a/modules/bindings/src/tmalign/TMalign.cpp b/modules/bindings/src/USalign/TMalign.cpp
similarity index 93%
rename from modules/bindings/src/tmalign/TMalign.cpp
rename to modules/bindings/src/USalign/TMalign.cpp
index 7ea33e1a7..c822d4c30 100644
--- a/modules/bindings/src/tmalign/TMalign.cpp
+++ b/modules/bindings/src/USalign/TMalign.cpp
@@ -9,7 +9,7 @@ void print_version()
     cout << 
 "\n"
 " **********************************************************************\n"
-" * TM-align (Version 20210520): protein and RNA structure alignment   *\n"
+" * TM-align (Version 20220623): protein and RNA structure alignment   *\n"
 " * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005)  *\n"
 " *             S Gong, C Zhang, Y Zhang. Bioinformatics, bz282 (2019) *\n"
 " * Please email comments and suggestions to yangzhanglab@umich.edu    *\n"
@@ -67,7 +67,7 @@ void print_extra_help()
 "            -1: full output, but without version or citation information\n"
 "\n"
 "    -byresi  Whether to assume residue index correspondence between the\n" 
-"             two structures.\n"
+"             two structures. The same as -TMscore.\n"
 "             0: (default) sequence independent alignment\n"
 "             1: (same as TMscore program) sequence-dependent superposition,\n"
 "                i.e. align by residue index\n"
@@ -75,6 +75,11 @@ void print_extra_help()
 "                align by residue index and chain ID\n"
 "             3: (similar to TMscore -c, should be used with -ter <=1)\n"
 "                align by residue index and order of chain\n"
+//"             4: sequence dependent alignment: perform Needleman-Wunsch\n"
+//"                global sequence alignment, followed by TM-score superposition\n"
+"             5: sequence dependent alignment: perform glocal sequence\n"
+"                alignment followed by TM-score superposition.\n"
+"                -byresi 5 is thee same as -seq\n"
 "\n"
 "    -TMcut   -1: (default) do not consider TMcut\n"
 "             Values in [0.5,1): Do not proceed with TM-align for this\n"
@@ -308,10 +313,15 @@ int main(int argc, char *argv[])
         {
             TMcut=atof(argv[i + 1]); i++;
         }
-        else if ( !strcmp(argv[i],"-byresi") && i < (argc-1) )
+        else if ((!strcmp(argv[i],"-byresi") || !strcmp(argv[i],"-tmscore") ||
+                  !strcmp(argv[i],"-TMscore")) && i < (argc-1) )
         {
             byresi_opt=atoi(argv[i + 1]); i++;
         }
+        else if ( !strcmp(argv[i],"-seq") )
+        {
+            byresi_opt=5;
+        }
         else if ( !strcmp(argv[i],"-cp") )
         {
             cp_opt=1;
@@ -374,10 +384,10 @@ int main(int argc, char *argv[])
     {
         if (i_opt)
             PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I");
-        if (byresi_opt<0 || byresi_opt>3)
-            PrintErrorAndQuit("-byresi can only be 0, 1, 2 or 3");
-        if (byresi_opt>=2 && ter_opt>=2)
-            PrintErrorAndQuit("-byresi >=2 should be used with -ter <=1");
+        if (byresi_opt<0 || byresi_opt>5)
+            PrintErrorAndQuit("-byresi can only be 0, 1, 2, 3, 4, or 5");
+        if (byresi_opt>=2 && byresi_opt<=3 && ter_opt>=2)
+            PrintErrorAndQuit("-byresi 2 and -byresi 3 should be used with -ter <=1");
     }
     if (split_opt==1 && ter_opt!=0)
         PrintErrorAndQuit("-split 1 should be used with -ter 0");
@@ -566,9 +576,9 @@ int main(int argc, char *argv[])
                         n_ali8, L_ali, TM_ali, rmsd_ali,
                         TM_0, d0_0, d0A, d0B,
                         Lnorm_ass, d0_scale, d0a, d0u, 
-                        (m_opt?fname_matrix+chainID_list1[chain_i]:"").c_str(),
+                        (m_opt?fname_matrix:"").c_str(),
                         outfmt_opt, ter_opt, 0, split_opt, o_opt,
-                        (o_opt?fname_super+chainID_list1[chain_i]:"").c_str(),
+                        (o_opt?fname_super:"").c_str(),
                         i_opt, a_opt, u_opt, d_opt,mirror_opt,
                         resi_vec1, resi_vec2 );
 
@@ -618,6 +628,6 @@ int main(int argc, char *argv[])
 
     t2 = clock();
     float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
-    printf("Total CPU time is %5.2f seconds\n", diff);
+    printf("#Total CPU time is %5.2f seconds\n", diff);
     return 0;
 }
diff --git a/modules/bindings/src/tmalign/TMalign.h b/modules/bindings/src/USalign/TMalign.h
similarity index 88%
rename from modules/bindings/src/tmalign/TMalign.h
rename to modules/bindings/src/USalign/TMalign.h
index 9187ad3cb..81196a807 100644
--- a/modules/bindings/src/tmalign/TMalign.h
+++ b/modules/bindings/src/USalign/TMalign.h
@@ -1,9 +1,12 @@
 /* Functions for the core TMalign algorithm, including the entry function
  * TMalign_main */
+#ifndef TMalign_h
+#define TMalign_h 1
 
 #include "param_set.h"
 #include "NW.h"
 #include "Kabsch.h"
+#include "NWalign.h"
 
 //     1, collect those residues with dis<d;
 //     2, calculate TMscore
@@ -540,6 +543,10 @@ double get_score_fast( double **r1, double **r2, double **xtm, double **ytm,
    
    //second iteration 
     double d002t=d002;
+    vector<double> dis_vec(dis, dis+n_ali);
+    sort(dis_vec.begin(), dis_vec.end());
+    if (d002t<dis_vec[2]) d002t=dis_vec[2];
+    dis_vec.clear();
     while(1)
     {
         j=0;
@@ -577,7 +584,10 @@ double get_score_fast( double **r1, double **r2, double **xtm, double **ytm,
         
         //third iteration
         d002t=d002+1;
-       
+        vector<double> dis_vec(dis, dis+n_ali);
+        sort(dis_vec.begin(), dis_vec.end());
+        if (d002t<dis_vec[2]) d002t=dis_vec[2];
+        dis_vec.clear();
         while(1)
         {
             j=0;
@@ -852,6 +862,11 @@ void make_sec(char *seq, double **x, int len, char *sec,const string atom_opt)
             if (i>0 && j+1<len && bp[i-1][j+1]) continue;
             if (!bp[i+1][j-1]) continue;
             sec_str(len,seq, bp, i,j,ii,jj);
+            if (jj<i || j<ii)
+            {
+                ii=i;
+                jj=j;
+            }
             A0.push_back(i);
             B0.push_back(j);
             C0.push_back(ii);
@@ -1467,11 +1482,14 @@ void output_pymol(const string xname, const string yname,
 {
     int compress_type=0; // uncompressed file
     ifstream fin;
+#ifndef REDI_PSTREAM_H_SEEN
+    ifstream fin_gz;
+#else
     redi::ipstream fin_gz; // if file is compressed
     if (xname.size()>=3 && 
         xname.substr(xname.size()-3,3)==".gz")
     {
-        fin_gz.open("zcat "+xname);
+        fin_gz.open("gunzip -c "+xname);
         compress_type=1;
     }
     else if (xname.size()>=4 && 
@@ -1480,7 +1498,9 @@ void output_pymol(const string xname, const string yname,
         fin_gz.open("bzcat "+xname);
         compress_type=2;
     }
-    else fin.open(xname.c_str());
+    else
+#endif
+        fin.open(xname.c_str());
 
     stringstream buf;
     stringstream buf_pymol;
@@ -1534,7 +1554,7 @@ void output_pymol(const string xname, const string yname,
             if (line.compare(0,11,"_atom_site.")) continue;
             _atom_site.clear();
             atom_site_pos=0;
-            _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+            _atom_site[Trim(line.substr(11))]=atom_site_pos;
             while(1)
             {
                 while(1)
@@ -1552,7 +1572,7 @@ void output_pymol(const string xname, const string yname,
                     if (line.size()) break;
                 }
                 if (line.compare(0,11,"_atom_site.")) break;
-                _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+                _atom_site[Trim(line.substr(11))]=++atom_site_pos;
                 buf<<line<<'\n';
             }
 
@@ -2438,30 +2458,37 @@ void output_rasmol(const string xname, const string yname,
 void output_rotation_matrix(const char* fname_matrix,
     const double t[3], const double u[3][3])
 {
-    fstream fout;
-    fout.open(fname_matrix, ios::out | ios::trunc);
-    if (fout)// succeed
-    {
-        fout << "------ The rotation matrix to rotate Structure_1 to Structure_2 ------\n";
-        char dest[1000];
-        sprintf(dest, "m %18s %14s %14s %14s\n", "t[m]", "u[m][0]", "u[m][1]", "u[m][2]");
-        fout << string(dest);
-        for (int k = 0; k < 3; k++)
-        {
-            sprintf(dest, "%d %18.10f %14.10f %14.10f %14.10f\n", k, t[k], u[k][0], u[k][1], u[k][2]);
-            fout << string(dest);
-        }
-        fout << "\nCode for rotating Structure 1 from (x,y,z) to (X,Y,Z):\n"
-                "for(i=0; i<L; i++)\n"
-                "{\n"
-                "   X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n"
-                "   Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n"
-                "   Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n"
-                "}\n";
-        fout.close();
-    }
+    stringstream ss;
+    ss << "------ The rotation matrix to rotate Structure_1 to Structure_2 ------\n";
+    char dest[1000];
+    sprintf(dest, "m %18s %14s %14s %14s\n", "t[m]", "u[m][0]", "u[m][1]", "u[m][2]");
+    ss << string(dest);
+    for (int k = 0; k < 3; k++)
+    {
+        sprintf(dest, "%d %18.10f %14.10f %14.10f %14.10f\n", k, t[k], u[k][0], u[k][1], u[k][2]);
+        ss << string(dest);
+    }
+    ss << "\nCode for rotating Structure 1 from (x,y,z) to (X,Y,Z):\n"
+            "for(i=0; i<L; i++)\n"
+            "{\n"
+            "   X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n"
+            "   Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n"
+            "   Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n"
+            "}\n";
+    if (strcmp(fname_matrix,(char *)("-"))==0)
+       cout<<ss.str();
     else
-        cout << "Open file to output rotation matrix fail.\n";
+    {
+        fstream fout;
+        fout.open(fname_matrix, ios::out | ios::trunc);
+        if (fout)
+        {
+            fout<<ss.str();
+            fout.close();
+        }
+        else cout << "Open file to output rotation matrix fail.\n";
+    }
+    ss.str(string());
 }
 
 //output the final results
@@ -2560,6 +2587,82 @@ void output_results(const string xname, const string yname,
             xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden);
 }
 
+void output_mTMalign_results(const string xname, const string yname,
+    const string chainID1, const string chainID2,
+    const int xlen, const int ylen, double t[3], double u[3][3],
+    const double TM1, const double TM2,
+    const double TM3, const double TM4, const double TM5,
+    const double rmsd, const double d0_out, const char *seqM,
+    const char *seqxA, const char *seqyA, const double Liden,
+    const int n_ali8, const int L_ali, const double TM_ali,
+    const double rmsd_ali, const double TM_0, const double d0_0,
+    const double d0A, const double d0B, const double Lnorm_ass,
+    const double d0_scale, const double d0a, const double d0u,
+    const char* fname_matrix, const int outfmt_opt, const int ter_opt,
+    const int mm_opt, const int split_opt, const int o_opt,
+    const string fname_super, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const int mirror_opt,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2)
+{
+    if (outfmt_opt<=0)
+    {
+        printf("Average aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
+        printf("Average TM-score= %6.5f (normalized by length of shorter structure: L=%d, d0=%.2f)\n", TM2, xlen, d0B);
+        printf("Average TM-score= %6.5f (normalized by length of longer structure: L=%d, d0=%.2f)\n", TM1, ylen, d0A);
+
+        if (a_opt==1)
+            printf("Average TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+        if (u_opt)
+            printf("Average TM-score= %6.5f (normalized by average L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u);
+        if (d_opt)
+            printf("Average TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen);
+    
+        //output alignment
+        printf("In the following, seqID=n_identical/L.\n\n%s\n", seqM);
+    }
+    else if (outfmt_opt==1)
+    {
+        printf("%s\n", seqM);
+
+        printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n",
+            n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
+
+        if (i_opt)
+            printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali);
+
+        if(a_opt)
+            printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+
+        if(u_opt)
+            printf("# TM-score=%.5f (normalized by average L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u);
+
+        if(d_opt)
+            printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen);
+
+        printf("$$$$\n");
+    }
+    else if (outfmt_opt==2)
+    {
+        printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d",
+            xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(),
+            TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0,
+            xlen, ylen, n_ali8);
+    }
+    cout << endl;
+
+    if (strlen(fname_matrix)) output_rotation_matrix(fname_matrix, t, u);
+
+    if (o_opt==1)
+        output_pymol(xname, yname, fname_super, t, u, ter_opt,
+            mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA,
+            resi_vec1, resi_vec2, chainID1, chainID2);
+    else if (o_opt==2)
+        output_rasmol(xname, yname, fname_super, t, u, ter_opt,
+            mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA,
+            resi_vec1, resi_vec2, chainID1, chainID2,
+            xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden);
+}
+
 double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm,
     double **xt, double **x, double **y, int xlen, int ylen, int invmap[],
     int& L_ali, double& RMSD, double D0_MIN, double Lnorm, double d0,
@@ -2757,7 +2860,6 @@ int TMalign_main(double **xa, double **ya,
     //    get initial alignment from user's input:    //
     //    Stick to the initial alignment              //
     //************************************************//
-    bool bAlignStick = false;
     if (i_opt==3)// if input has set parameter for "-I"
     {
         // In the original code, this loop starts from 1, which is
@@ -2798,13 +2900,12 @@ int TMalign_main(double **xa, double **ya,
             TMmax = TM;
             for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
         }
-        bAlignStick = true;
     }
 
     /******************************************************/
     /*    get initial alignment with gapless threading    */
     /******************************************************/
-    if (!bAlignStick)
+    if (i_opt<=1)
     {
         get_initial(r1, r2, xtm, ytm, xa, ya, xlen, ylen, invmap0, d0,
             d0_search, fast_opt, t, u);
@@ -3007,60 +3108,60 @@ int TMalign_main(double **xa, double **ya,
                 return 6;
             }
         }
+    }
 
-        //************************************************//
-        //    get initial alignment from user's input:    //
-        //************************************************//
-        if (i_opt==1)// if input has set parameter for "-i"
-        {
-            for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
-                invmap[j] = -1;
+    //************************************************//
+    //    get initial alignment from user's input:    //
+    //************************************************//
+    if (i_opt>=1 && i_opt<=2)// if input has set parameter for "-i"
+    {
+        for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
+            invmap[j] = -1;
 
-            int i1 = -1;// in C version, index starts from zero, not from one
-            int i2 = -1;
-            int L1 = sequence[0].size();
-            int L2 = sequence[1].size();
-            int L = min(L1, L2);// Get positions for aligned residues
-            for (int kk1 = 0; kk1 < L; kk1++)
+        int i1 = -1;// in C version, index starts from zero, not from one
+        int i2 = -1;
+        int L1 = sequence[0].size();
+        int L2 = sequence[1].size();
+        int L = min(L1, L2);// Get positions for aligned residues
+        for (int kk1 = 0; kk1 < L; kk1++)
+        {
+            if (sequence[0][kk1] != '-')
+                i1++;
+            if (sequence[1][kk1] != '-')
             {
-                if (sequence[0][kk1] != '-')
-                    i1++;
-                if (sequence[1][kk1] != '-')
-                {
-                    i2++;
-                    if (i2 >= ylen || i1 >= xlen) kk1 = L;
-                    else if (sequence[0][kk1] != '-') invmap[i2] = i1;
-                }
+                i2++;
+                if (i2 >= ylen || i1 >= xlen) kk1 = L;
+                else if (sequence[0][kk1] != '-') invmap[i2] = i1;
             }
+        }
 
-            //--------------- 2. Align proteins from original alignment
-            double prevD0_MIN = D0_MIN;// stored for later use
-            int prevLnorm = Lnorm;
-            double prevd0 = d0;
-            TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya,
-                xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0,
-                d0_search, score_d8, t, u, mol_type);
-            D0_MIN = prevD0_MIN;
-            Lnorm = prevLnorm;
-            d0 = prevd0;
+        //--------------- 2. Align proteins from original alignment
+        double prevD0_MIN = D0_MIN;// stored for later use
+        int prevLnorm = Lnorm;
+        double prevd0 = d0;
+        TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya,
+            xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0,
+            d0_search, score_d8, t, u, mol_type);
+        D0_MIN = prevD0_MIN;
+        Lnorm = prevLnorm;
+        d0 = prevd0;
 
-            TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya,
-                xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm,
-                score_d8, d0);
-            if (TM > TMmax)
-            {
-                TMmax = TM;
-                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
-            }
-            // Different from get_initial, get_initial_ss and get_initial_ssplus
-            TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
-                xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
-                local_d0_search, D0_MIN, Lnorm, d0, score_d8);
-            if (TM>TMmax)
-            {
-                TMmax = TM;
-                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
-            }
+        TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya,
+            xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm,
+            score_d8, d0);
+        if (TM > TMmax)
+        {
+            TMmax = TM;
+            for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+        }
+        // Different from get_initial, get_initial_ss and get_initial_ssplus
+        TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+            xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
+            local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
         }
     }
 
@@ -3081,7 +3182,7 @@ int TMalign_main(double **xa, double **ya,
     }
     if(!flag)
     {
-        cout << "There is no alignment between the two proteins! "
+        cout << "There is no alignment between the two structures! "
              << "Program stop with no result!" << endl;
         TM1=TM2=TM3=TM4=TM5=0;
         return 1;
@@ -3240,6 +3341,8 @@ int TMalign_main(double **xa, double **ya,
 
     int kk=0, i_old=0, j_old=0;
     d=0;
+    Liden=0;
+    //double SO=0;
     for(int k=0; k<n_ali8; k++)
     {
         for(int i=i_old; i<m1[k]; i++)
@@ -3266,10 +3369,16 @@ int TMalign_main(double **xa, double **ya,
         d=sqrt(dist(&xt[m1[k]][0], &ya[m2[k]][0]));
         if(d<d0_out) seqM[kk]=':';
         else         seqM[kk]='.';
+        //SO+=(d<3.5);
         kk++;  
         i_old=m1[k]+1;
         j_old=m2[k]+1;
     }
+    //SO/=getmin(xlen,ylen);
+    //cout<<n_ali8<<'\t'
+        //<<rmsd0<<'\t'
+        //<<100.*SO<<endl;
+
 
     //tail
     for(int i=i_old; i<xlen; i++)
@@ -3342,13 +3451,14 @@ int CPalign_main(double **xa, double **ya,
     secx_cp[2*xlen]=0;
     
     /* fTM-align alignment */
-    double TM1_cp,TM2_cp;
+    double TM1_cp,TM2_cp,TM4_cp;
+    const double Lnorm_tmp=getmin(xlen,ylen);
     TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
-        t0, u0, TM1_cp, TM2_cp, TM3, TM4, TM5,
+        t0, u0, TM1_cp, TM2_cp, TM3, TM4_cp, TM5,
         d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp,
         rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
-        xlen*2, ylen, sequence, Lnorm_ass, d0_scale,
-        0, false, false, false, true, mol_type, -1);
+        xlen*2, ylen, sequence, Lnorm_tmp, d0_scale,
+        0, false, true, false, true, mol_type, -1);
 
     /* delete gap in seqxA_cp */
     r=0;
@@ -3392,12 +3502,14 @@ int CPalign_main(double **xa, double **ya,
         t0, u0, TM1, TM2, TM3, TM4, TM5,
         d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
         rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
-        xlen, ylen, sequence, Lnorm_ass, d0_scale,
-        0, false, false, false, true, mol_type, -1);
+        xlen, ylen, sequence, Lnorm_tmp, d0_scale,
+        0, false, true, false, true, mol_type, -1);
 
-    /* do not use cricular permutation of number of aligned residues is not
+    /* do not use circular permutation of number of aligned residues is not
      * larger than sequence-order dependent alignment */
-    if (n_ali8>cp_aln_best) cp_point=0;
+    //cout<<"cp: aln="<<cp_aln_best<<"\tTM="<<TM4_cp<<endl;
+    //cout<<"TM: aln="<<n_ali8<<"\tTM="<<TM4<<endl;
+    if (n_ali8>=cp_aln_best || TM4>=TM4_cp) cp_point=0;
 
     /* prepare structure for final alignment */
     seqM.clear();
@@ -3418,6 +3530,31 @@ int CPalign_main(double **xa, double **ya,
     seqx_cp[xlen]=0;
     secx_cp[xlen]=0;
 
+    /* test another round of alignment as concatenated alignment can
+     * inflate the number of aligned residues and TM-score. e.g. 1yadA 2duaA */
+    if (cp_point!=0)
+    {
+        TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
+            t0, u0, TM1_cp, TM2_cp, TM3, TM4_cp, TM5,
+            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, cp_aln_best,
+            xlen, ylen, sequence, Lnorm_tmp, d0_scale,
+            0, false, true, false, true, mol_type, -1);
+        //cout<<"cp: aln="<<cp_aln_best<<"\tTM="<<TM4_cp<<endl;
+        if (n_ali8>=cp_aln_best || TM4>=TM4_cp)
+        {
+            cp_point=0;
+            for (r=0;r<xlen;r++)
+            {
+                xa_cp[r][0]=xa[r][0];
+                xa_cp[r][1]=xa[r][1];
+                xa_cp[r][2]=xa[r][2];
+                seqx_cp[r]=seqx[r];
+                secx_cp[r]=secx[r];
+            }
+        }
+    }
+
     /* full TM-align */
     TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
         t0, u0, TM1, TM2, TM3, TM4, TM5,
@@ -3459,3 +3596,46 @@ int CPalign_main(double **xa, double **ya,
     seqyA_cp.clear();
     return cp_point;
 }
+
+bool output_cp(const string&xname, const string&yname,
+    const string &seqxA, const string &seqyA, const int outfmt_opt,
+    int &left_num, int &right_num, int &left_aln_num, int &right_aln_num)
+{
+    int r;
+    bool after_cp=false;
+    for (r=0;r<seqxA.size();r++)
+    {
+        if (seqxA[r]=='*') after_cp=true;
+        else 
+        {
+            if (after_cp)
+            {
+                right_aln_num++;
+                right_num+=(seqxA[r]!='-');
+            }
+            else
+            {
+                left_aln_num++;
+                left_num+=(seqxA[r]!='-');
+            }
+        }
+    }
+    if (after_cp==false)
+    {
+        if (outfmt_opt<=0) cout<<"No CP"<<endl;
+        else if (outfmt_opt==1) cout<<"#No CP"<<endl;
+        else if (outfmt_opt==2) cout<<"@"<<xname<<'\t'<<yname<<'\t'<<"No CP"<<endl;
+    }
+    else
+    {
+        if (outfmt_opt<=0) cout<<"CP point in structure_1 alignment: "<<left_aln_num<<'/'<<right_aln_num<<'\n'
+            <<"CP point in structure_1: "<<left_num<<'/'<<right_num<<endl;
+        else if (outfmt_opt==1) 
+            cout<<"#CP_in_aln="<<left_aln_num<<'/'<<right_aln_num
+               <<"\tCP_in_seq="<<left_num<<'/'<<right_num<<endl;
+        else if (outfmt_opt==2) cout<<"@"<<xname<<'\t'<<yname<<'\t'<<left_aln_num
+            <<'/'<<right_aln_num<<'\t'<<left_num<<'/'<<right_num<<endl;
+    }
+    return after_cp;
+}
+#endif
diff --git a/modules/bindings/src/tmalign/TMscore.cpp b/modules/bindings/src/USalign/TMscore.cpp
similarity index 91%
rename from modules/bindings/src/tmalign/TMscore.cpp
rename to modules/bindings/src/USalign/TMscore.cpp
index c2ca9958a..c84d742c1 100644
--- a/modules/bindings/src/tmalign/TMscore.cpp
+++ b/modules/bindings/src/USalign/TMscore.cpp
@@ -34,15 +34,15 @@ void print_extra_help()
 "    -dir     Perform all-against-all alignment among the list of PDB\n"
 "             chains listed by 'chain_list' under 'chain_folder'. Note\n"
 "             that the slash is necessary.\n"
-"             $ TMalign -dir chain_folder/ chain_list\n"
+"             $ TMscore -dir chain_folder/ chain_list\n"
 "\n"
 "    -dir1    Use chain2 to search a list of PDB chains listed by 'chain1_list'\n"
 "             under 'chain1_folder'. Note that the slash is necessary.\n"
-"             $ TMalign -dir1 chain1_folder/ chain1_list chain2\n"
+"             $ TMscore -dir1 chain1_folder/ chain1_list chain2\n"
 "\n"
 "    -dir2    Use chain1 to search a list of PDB chains listed by 'chain2_list'\n"
 "             under 'chain2_folder'\n"
-"             $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n"
+"             $ TMscore chain1 -dir2 chain2_folder/ chain2_list\n"
 "\n"
 "    -suffix  (Only when -dir1 and/or -dir2 are set, default is empty)\n"
 "             add file name suffix to files listed by chain1_list or chain2_list\n"
@@ -106,13 +106,21 @@ void print_help(bool h_opt=false)
 " 2. TM-score normalized with an assigned scale d0 e.g. 5 A:\n"
 "     $ TMscore model.pdb native.pdb -d 5\n"
 "\n"
-" 3. TM-score normalized by a specific length, e.g. 120 AA:\n"
-"     $ TMscore model.pdb native.pdv -l 120\n"
+" 3. TM-score normalized by a specific length, e.g. 120 residues:\n"
+"     $ TMscore model.pdb native.pdb -l 120\n"
 "\n"
 " 4. TM-score with superposition output, e.g. 'TM_sup.pdb':\n"
 "     $ TMscore model.pdb native.pdb -o TM_sup.pdb\n"
 "    To view superimposed atomic model by PyMOL:\n"
 "     $ pymol TM_sup.pdb native.pdb\n"
+"\n"
+" 5. By default, this program assumes that residue pair with the same\n"
+"    residue index accross the two structure files are equivalent. This\n"
+"    often requires that the residue index in the input structures are\n"
+"    renumbered beforehand. Alternatively, residue equivalence can be\n"
+"    established by sequence alignment:\n"
+"     $ TMscore model.pdb native.pdb -seq\n"
+"\n"
     <<endl;
 
     if (h_opt) print_extra_help();
@@ -253,6 +261,10 @@ int main(int argc, char *argv[])
         {
             byresi_opt=2;
         }
+        else if ( !strcmp(argv[i],"-seq") )
+        {
+            byresi_opt=5;
+        }
         else if ( !strcmp(argv[i],"-mirror") && i < (argc-1) )
         {
             mirror_opt=atoi(argv[i + 1]); i++;
@@ -307,8 +319,8 @@ int main(int argc, char *argv[])
         PrintErrorAndQuit("Wrong value for option -d!  It should be >0");
     if (outfmt_opt>=2 && (a_opt || u_opt || d_opt))
         PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d");
-    if (byresi_opt>=2 && ter_opt>=2)
-        PrintErrorAndQuit("-byresi >=2 should be used with -ter <=1");
+    if (byresi_opt>=2 && byresi_opt<=3 && ter_opt>=2)
+        PrintErrorAndQuit("-c should be used with -ter <=1");
     if (split_opt==1 && ter_opt!=0)
         PrintErrorAndQuit("-split 1 should be used with -ter 0");
     else if (split_opt==2 && ter_opt!=0 && ter_opt!=1)
@@ -329,6 +341,11 @@ int main(int argc, char *argv[])
     else if (dir2_opt.size()==0) chain2_list.push_back(yname);
     else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt);
 
+    if (byresi_opt>=4)
+        cerr<<"WARNING! The residue correspondence between the two structures"
+            <<" are automatically established by sequence alignment. Results"
+            <<" may be unreliable."<<endl;
+
     if (outfmt_opt==2)
         cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t"
             <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"<<endl;
@@ -447,6 +464,7 @@ int main(int argc, char *argv[])
                     int L_lt_d=0;
                     double GDT_list[5]={0,0,0,0,0}; // 0.5, 1, 2, 4, 8
                     double maxsub=0;
+                    TM1=TM2=TM3=TM4=TM5=0;
 
                     /* entry function for structure alignment */
                     TMscore_main(
@@ -473,9 +491,9 @@ int main(int argc, char *argv[])
                         n_ali8, L_ali, TM_ali, rmsd_ali,
                         TM_0, d0_0, d0A, d0B,
                         Lnorm_ass, d0_scale, d0a, d0u, 
-                        (m_opt?fname_matrix+chainID_list1[chain_i]:"").c_str(),
+                        (m_opt?fname_matrix:"").c_str(),
                         outfmt_opt, ter_opt, 
-                        (o_opt?fname_super+chainID_list1[chain_i]:"").c_str(),
+                        (o_opt?fname_super:"").c_str(),
                         a_opt, u_opt, d_opt, mirror_opt,
                         L_lt_d, rmsd_d0_out, GDT_list, maxsub,
                         split_opt, resi_vec1, resi_vec2);
diff --git a/modules/bindings/src/tmalign/TMscore.h b/modules/bindings/src/USalign/TMscore.h
similarity index 95%
rename from modules/bindings/src/tmalign/TMscore.h
rename to modules/bindings/src/USalign/TMscore.h
index 445335c79..90ded3c01 100644
--- a/modules/bindings/src/tmalign/TMscore.h
+++ b/modules/bindings/src/USalign/TMscore.h
@@ -58,7 +58,7 @@ int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
                 }
             }
         }
-        //there are not enough feasible pairs, reliefe the threshold         
+        //there are not enough feasible pairs, relieve the threshold         
         if(n_cut<3 && n_ali>3)
         {
             inc++;
@@ -130,7 +130,7 @@ int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
                 }
             }
         }
-        //there are not enough feasible pairs, reliefe the threshold         
+        //there are not enough feasible pairs, relieve the threshold         
         if (n_cut<3 && n_ali>3)
         {
             inc++;
@@ -309,6 +309,7 @@ double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm,
     return score_max;
 }
 
+
 double TMscore8_search_standard( double **r1, double **r2,
     double **xtm, double **ytm, double **xt, int Lali,
     double t0[3], double u0[3][3], int simplify_step, int score_sum_method,
@@ -353,7 +354,7 @@ double TMscore8_search_standard( double **r1, double **r2,
     //find the maximum score starting from local structures superposition
     int i_ali[kmax], n_cut;
     int L_frag; //fragment length
-    int iL_max; //maximum starting postion for the fragment
+    int iL_max; //maximum starting position for the fragment
 
     for (i_init = 0; i_init<n_init; i_init++)
     {
@@ -560,7 +561,7 @@ int TMscore_main(double **xa, double **ya,
     /***********************/
     parameter_set4search(xlen, ylen, D0_MIN, Lnorm, 
         score_d8, d0, d0_search, dcu0);
-    int simplify_step    = 40; //for similified search engine
+    int simplify_step    = 40; //for simplified search engine
     int score_sum_method = 8;  //for scoring method, whether only sum over pairs with dis<score_d8
 
     int i;
@@ -616,7 +617,7 @@ int TMscore_main(double **xa, double **ya,
     //*******************************************************************//
     //    The alignment will not be changed any more in the following    //
     //*******************************************************************//
-    //check if the initial alignment is generated approriately
+    //check if the initial alignment is generated appropriately
     bool flag=false;
     for(i=0; i<ylen; i++)
     {
@@ -628,8 +629,8 @@ int TMscore_main(double **xa, double **ya,
     }
     if(!flag)
     {
-        cout << "There is no alignment between the two proteins!" << endl;
-        cout << "Program stop with no result!" << endl;
+        cout << "There is no alignment between the two structures! "
+             << "Program stop with no result!" << endl;
         return 1;
     }
 
@@ -652,7 +653,7 @@ int TMscore_main(double **xa, double **ya,
     //    Detailed TMscore search engine --> prepare for final TMscore    //
     //********************************************************************//
     //run detailed TMscore search engine for the best alignment, and
-    //extract the best rotation matrix (t, u) for the best alginment
+    //extract the best rotation matrix (t, u) for the best alignment
     simplify_step=1;
     if (fast_opt) simplify_step=40;
     score_sum_method=8;
diff --git a/modules/bindings/src/USalign/USalign.cpp b/modules/bindings/src/USalign/USalign.cpp
new file mode 100644
index 000000000..fdd1d8b95
--- /dev/null
+++ b/modules/bindings/src/USalign/USalign.cpp
@@ -0,0 +1,3137 @@
+/* command line argument parsing and document of US-align main program */
+
+#include "MMalign.h"
+#include "SOIalign.h"
+#include "flexalign.h"
+
+using namespace std;
+
+void print_version()
+{
+    cout << 
+"\n"
+" ********************************************************************\n"
+" * US-align (Version 20220924)                                      *\n"
+" * Universal Structure Alignment of Proteins and Nucleic Acids      *\n"
+" * Reference: C Zhang, M Shine, AM Pyle, Y Zhang. (2022) Nat Methods*\n"
+" * Please email comments and suggestions to zhang@zhanggroup.org    *\n"
+" ********************************************************************"
+    << endl;
+}
+
+void print_extra_help()
+{
+    cout <<
+"Additional options:\n"
+"      -v  Print the version of US-align\n"
+"\n"
+"      -a  TM-score normalized by the average length of two structures\n"
+"          T or F, (default F). -a does not change the final alignment.\n"
+"\n"
+"   -fast  Fast but slightly inaccurate alignment\n"
+"\n"
+"    -dir  Perform all-against-all alignment among the list of PDB\n"
+"          chains listed by 'chain_list' under 'chain_folder'. Note\n"
+"          that the slash is necessary.\n"
+"          $ USalign -dir chain_folder/ chain_list\n"
+"\n"
+"   -dir1  Use chain2 to search a list of PDB chains listed by 'chain1_list'\n"
+"          under 'chain1_folder'. Note that the slash is necessary.\n"
+"          $ USalign -dir1 chain1_folder/ chain1_list chain2\n"
+"\n"
+"   -dir2  Use chain1 to search a list of PDB chains listed by 'chain2_list'\n"
+"          under 'chain2_folder'\n"
+"          $ USalign chain1 -dir2 chain2_folder/ chain2_list\n"
+"\n"
+" -suffix  (Only when -dir1 and/or -dir2 are set, default is empty)\n"
+"          add file name suffix to files listed by chain1_list or chain2_list\n"
+"\n"
+"   -atom  4-character atom name used to represent a residue.\n"
+"          Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n"
+"          (note the spaces before and after CA).\n"
+"\n"
+"  -split  Whether to split PDB file into multiple chains\n"
+"           0: treat the whole structure as one single chain\n"
+"           1: treat each MODEL as a separate chain\n"
+"           2: (default) treat each chain as a separate chain\n"
+"\n"
+" -outfmt  Output format\n"
+"           0: (default) full output\n"
+"           1: fasta format compact output\n"
+"           2: tabular format very compact output\n"
+"          -1: full output, but without version or citation information\n"
+"\n"
+"  -TMcut  -1: (default) do not consider TMcut\n"
+"          Values in [0.5,1): Do not proceed with TM-align for this\n"
+"          structure pair if TM-score is unlikely to reach TMcut.\n"
+"          TMcut is normalized as set by -a option:\n"
+"          -2: normalized by longer structure length\n"
+"          -1: normalized by shorter structure length\n"
+"           0: (default, same as F) normalized by second structure\n"
+"           1: same as T, normalized by average structure length\n"
+"\n"
+" -mirror  Whether to align the mirror image of input structure\n"
+"           0: (default) do not align mirrored structure\n"
+"           1: align mirror of Structure_1 to origin Structure_2,\n"
+"              which usually requires the '-het 1' option:\n"
+"              $ USalign 4glu.pdb 3p9w.pdb -mirror 1 -het 1\n"
+"\n"
+"    -het  Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"           0: (default) only align 'ATOM  ' residues\n"
+"           1: align both 'ATOM  ' and 'HETATM' residues\n"
+"           2: align both 'ATOM  ' and MSE residues\n"
+"\n"
+"   -full  Whether to show full pairwise alignment of individual chains for\n"
+"          -mm 2 or 4. T or F, (default F)\n"
+//"\n"
+//" -closeK  Number of closest atoms used for sequence order independent\n"
+//"          initial alignment. default: 5\n"
+//"\n"
+//" -hinge   Maximum number of hinge allowed in flexible alignment. default: 9\n"
+"\n"
+"   -se    Do not perform superposition. Useful for extracting alignment from\n"
+"          superposed structure pairs\n"
+"\n"
+" -infmt1  Input format for structure_11\n"
+" -infmt2  Input format for structure_2\n"
+"          -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
+"           0: PDB format\n"
+"           1: SPICKER format\n"
+//"           2: xyz format\n"
+"           3: PDBx/mmCIF format\n"
+"\n"
+"Advanced usage 1 (generate an image for a pair of superposed structures):\n"
+"    USalign 1cpc.pdb 1mba.pdb -o sup\n"
+"    pymol -c -d @sup_all_atm.pml -g sup_all_atm.png\n"
+"\n"
+"Advanced usage 2 (a quick search of query.pdb against I-TASSER PDB library):\n"
+"    wget https://zhanggroup.org/library/PDB.tar.bz2\n"
+"    tar -xjvf PDB.tar.bz2\n"
+"    USalign query.pdb -dir2 PDB/ PDB/list -suffix .pdb -outfmt 2 -fast\n"
+    <<endl;
+}
+
+void print_help(bool h_opt=false)
+{
+    print_version();
+    cout <<
+"\n"
+"Usage: USalign PDB1.pdb PDB2.pdb [Options]\n"
+"\n"
+"Options:\n"
+"    -mol  Type of molecule(s) to align.\n"
+"          auto: (default) align both protein and nucleic acids.\n"
+"          prot: only align proteins in a structure.\n"
+"          RNA : only align RNA and DNA in a structure.\n"
+"\n"
+"     -mm  Multimeric alignment option:\n"
+"          0: (default) alignment of two monomeric structures\n"
+"          1: alignment of two multi-chain oligomeric structures\n"
+"          2: alignment of individual chains to an oligomeric structure\n"
+"             $ USalign -dir1 monomers/ list oligomer.pdb -ter 0 -mm 2\n"
+"          3: alignment of circularly permuted structure\n"
+"          4: alignment of multiple monomeric chains into a consensus alignment\n"
+"             $ USalign -dir chains/ list -suffix .pdb -mm 4\n"
+"          5: fully non-sequential (fNS) alignment\n"
+"          6: semi-non-sequential (sNS) alignment\n"
+"          To use -mm 1 or -mm 2, '-ter' option must be 0 or 1.\n"
+"\n"
+"    -ter  Number of chains to align.\n"
+"          3: only align the first chain, or the first segment of the\n"
+"             first chain as marked by the 'TER' string in PDB file\n"
+"          2: (default) only align the first chain\n"
+"          1: align all chains of the first model (recommended for aligning\n"
+"             asymmetric units)\n"
+"          0: align all chains from all models (recommended for aligning\n"
+"             biological assemblies, i.e. biounits)\n"
+"\n"
+" -TMscore Whether to perform TM-score superposition without structure-based\n"
+"          alignment. The same as -byresi.\n"
+"          0: (default) sequence independent structure alignment\n"
+"          1: superpose two structures by assuming that a pair of residues\n"
+"             with the same residue index are equivalent between the two\n"
+"             structures\n"
+"          2: superpose two complex structures, assuming that a pair of\n"
+"             residues with the same residue index and the same chain ID\n"
+"             are equivalent between the two structures\n"
+//"          3: (similar to TMscore '-c' option; used with -ter 0 or 1)\n"
+//"             align by residue index and order of chain\n"
+//"          4: sequence dependent alignment: perform Needleman-Wunsch\n"
+//"             global sequence alignment, followed by TM-score superposition\n"
+"          5: sequence dependent alignment: perform glocal sequence\n"
+"             alignment followed by TM-score superposition.\n"
+"             -byresi 5 is the same as -seq\n"
+"          6: superpose two complex structures by first deriving optimal\n"
+"             chain mapping, followed by TM-score superposition for residues\n"
+"             with the same residue ID\n"
+"\n"
+"      -I  Use the final alignment specified by FASTA file 'align.txt'\n"
+"\n"
+"      -i  Use alignment specified by 'align.txt' as an initial alignment\n"
+"\n"
+"      -m  Output rotation matrix for superposition\n"
+"\n"
+"      -d  TM-score scaled by an assigned d0, e.g., '-d 3.5' reports MaxSub\n"
+"          score, where d0 is 3.5 Angstrom. -d does not change final alignment.\n"
+"\n"
+"      -u  TM-score normalized by an assigned length. It should be >= length\n"
+"          of protein to avoid TM-score >1. -u does not change final alignment.\n"
+"\n"
+"      -o  Output superposed structure1 to sup.* for PyMOL viewing.\n"
+"          $ USalign structure1.pdb structure2.pdb -o sup\n"
+"          $ pymol -d @sup.pml                # C-alpha trace aligned region\n"
+"          $ pymol -d @sup_all.pml            # C-alpha trace whole chain\n"
+"          $ pymol -d @sup_atm.pml            # full-atom aligned region\n"
+"          $ pymol -d @sup_all_atm.pml        # full-atom whole chain\n"
+"          $ pymol -d @sup_all_atm_lig.pml    # full-atom with all molecules\n"
+"\n"
+" -rasmol  Output superposed structure1 to sup.* for RasMol viewing.\n"
+"          $ USalign structure1.pdb structure2.pdb -rasmol sup\n"
+"          $ rasmol -script sup               # C-alpha trace aligned region\n"
+"          $ rasmol -script sup_all           # C-alpha trace whole chain\n"
+"          $ rasmol -script sup_atm           # full-atom aligned region\n"
+"          $ rasmol -script sup_all_atm       # full-atom whole chain\n"
+"          $ rasmol -script sup_all_atm_lig   # full-atom with all molecules\n"
+"\n"
+//"      -h  Print the full help message, including additional options\n"
+//"\n"
+"Example usages ('gunzip' program is needed to read .gz compressed files):\n"
+"    USalign 101m.cif.gz 1mba.pdb             # pairwise monomeric protein alignment\n"
+"    USalign 1qf6.cif 5yyn.pdb.gz -mol RNA    # pairwise monomeric RNA alignment\n"
+"    USalign model.pdb native.pdb -TMscore 1  # calculate TM-score between two conformations of a monomer\n"
+"    USalign 4v4a.cif 4v49.cif -mm 1 -ter 1   # oligomeric alignment for asymmetic units\n"
+"    USalign 3ksc.pdb1 4lej.pdb1 -mm 1 -ter 0 # oligomeric alignment for biological units\n"
+"    USalign 1ajk.pdb.gz 2ayh.pdb.gz -mm 3    # circular permutation alignment\n"
+    <<endl;
+
+    //if (h_opt) 
+        print_extra_help();
+
+    exit(EXIT_SUCCESS);
+}
+
+/* TMalign, RNAalign, CPalign, TMscore */
+int TMalign(string &xname, string &yname, const string &fname_super,
+    const string &fname_lign, const string &fname_matrix,
+    vector<string> &sequence, const double Lnorm_ass, const double d0_scale,
+    const bool m_opt, const int  i_opt, const int o_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const double TMcut,
+    const int infmt1_opt, const int infmt2_opt, const int ter_opt,
+    const int split_opt, const int outfmt_opt, const bool fast_opt,
+    const int cp_opt, const int mirror_opt, const int het_opt,
+    const string &atom_opt, const string &mol_opt, const string &dir_opt,
+    const string &dir1_opt, const string &dir2_opt, const int byresi_opt,
+    const vector<string> &chain1_list, const vector<string> &chain2_list,
+    const bool se_opt)
+{
+    /* declare previously global variables */
+    vector<vector<string> >PDB_lines1; // text of chain1
+    vector<vector<string> >PDB_lines2; // text of chain2
+    vector<int> mol_vec1;              // molecule type of chain1, RNA if >0
+    vector<int> mol_vec2;              // molecule type of chain2, RNA if >0
+    vector<string> chainID_list1;      // list of chainID1
+    vector<string> chainID_list2;      // list of chainID2
+    int    i,j;                // file index
+    int    chain_i,chain_j;    // chain index
+    int    r;                  // residue index
+    int    xlen, ylen;         // chain length
+    int    xchainnum,ychainnum;// number of chains in a PDB file
+    char   *seqx, *seqy;       // for the protein sequence 
+    char   *secx, *secy;       // for the secondary structure 
+    double **xa, **ya;         // for input vectors xa[0...xlen-1][0..2] and
+                               // ya[0...ylen-1][0..2], in general,
+                               // ya is regarded as native structure 
+                               // --> superpose xa onto ya
+    vector<string> resi_vec1;  // residue index for chain1
+    vector<string> resi_vec2;  // residue index for chain2
+    int read_resi=byresi_opt;  // whether to read residue index
+    if (byresi_opt==0 && o_opt) read_resi=2;
+
+    /* loop over file names */
+    for (i=0;i<chain1_list.size();i++)
+    {
+        /* parse chain 1 */
+        xname=chain1_list[i];
+        xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
+            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
+        if (!xchainnum)
+        {
+            cerr<<"Warning! Cannot parse file: "<<xname
+                <<". Chain number 0."<<endl;
+            continue;
+        }
+        for (chain_i=0;chain_i<xchainnum;chain_i++)
+        {
+            xlen=PDB_lines1[chain_i].size();
+            if (mol_opt=="RNA") mol_vec1[chain_i]=1;
+            else if (mol_opt=="protein") mol_vec1[chain_i]=-1;
+            if (!xlen)
+            {
+                cerr<<"Warning! Cannot parse file: "<<xname
+                    <<". Chain length 0."<<endl;
+                continue;
+            }
+            else if (xlen<3)
+            {
+                cerr<<"Sequence is too short <3!: "<<xname<<endl;
+                continue;
+            }
+            NewArray(&xa, xlen, 3);
+            seqx = new char[xlen + 1];
+            secx = new char[xlen + 1];
+            xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, 
+                resi_vec1, read_resi);
+            if (mirror_opt) for (r=0;r<xlen;r++) xa[r][2]=-xa[r][2];
+            if (mol_vec1[chain_i]>0) make_sec(seqx,xa, xlen, secx,atom_opt);
+            else make_sec(xa, xlen, secx); // secondary structure assignment
+
+            for (j=(dir_opt.size()>0)*(i+1);j<chain2_list.size();j++)
+            {
+                /* parse chain 2 */
+                if (PDB_lines2.size()==0)
+                {
+                    yname=chain2_list[j];
+                    ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
+                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
+                        het_opt);
+                    if (!ychainnum)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain number 0."<<endl;
+                        continue;
+                    }
+                }
+                for (chain_j=0;chain_j<ychainnum;chain_j++)
+                {
+                    ylen=PDB_lines2[chain_j].size();
+                    if (mol_opt=="RNA") mol_vec2[chain_j]=1;
+                    else if (mol_opt=="protein") mol_vec2[chain_j]=-1;
+                    if (!ylen)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain length 0."<<endl;
+                        continue;
+                    }
+                    else if (ylen<3)
+                    {
+                        cerr<<"Sequence is too short <3!: "<<yname<<endl;
+                        continue;
+                    }
+                    NewArray(&ya, ylen, 3);
+                    seqy = new char[ylen + 1];
+                    secy = new char[ylen + 1];
+                    ylen = read_PDB(PDB_lines2[chain_j], ya, seqy,
+                        resi_vec2, read_resi);
+                    if (mol_vec2[chain_j]>0)
+                         make_sec(seqy, ya, ylen, secy, atom_opt);
+                    else make_sec(ya, ylen, secy);
+
+                    if (byresi_opt) extract_aln_from_resi(sequence,
+                        seqx,seqy,resi_vec1,resi_vec2,byresi_opt);
+
+                    /* declare variable specific to this pair of TMalign */
+                    double t0[3], u0[3][3];
+                    double TM1, TM2;
+                    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+                    double d0_0, TM_0;
+                    double d0A, d0B, d0u, d0a;
+                    double d0_out=5.0;
+                    string seqM, seqxA, seqyA;// for output alignment
+                    double rmsd0 = 0.0;
+                    int L_ali;                // Aligned length in standard_TMscore
+                    double Liden=0;
+                    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+                    int n_ali=0;
+                    int n_ali8=0;
+                    bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt;
+
+                    /* entry function for structure alignment */
+                    if (cp_opt) CPalign_main(
+                        xa, ya, seqx, seqy, secx, secy,
+                        t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                        seqM, seqxA, seqyA,
+                        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                        i_opt, a_opt, u_opt, d_opt, force_fast_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j],TMcut);
+                    else if (se_opt)
+                    {
+                        int *invmap = new int[ylen+1];
+                        u0[0][0]=u0[1][1]=u0[2][2]=1;
+                        u0[0][1]=         u0[0][2]=
+                        u0[1][0]=         u0[1][2]=
+                        u0[2][0]=         u0[2][1]=
+                        t0[0]   =t0[1]   =t0[2]   =0;
+                        se_main(
+                            xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+                            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                            seqM, seqxA, seqyA,
+                            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                            xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                            i_opt, a_opt, u_opt, d_opt,
+                            mol_vec1[chain_i]+mol_vec2[chain_j], 
+                            outfmt_opt, invmap);
+                        if (outfmt_opt>=2) 
+                        {
+                            Liden=L_ali=0;
+                            int r1,r2;
+                            for (r2=0;r2<ylen;r2++)
+                            {
+                                r1=invmap[r2];
+                                if (r1<0) continue;
+                                L_ali+=1;
+                                Liden+=(seqx[r1]==seqy[r2]);
+                            }
+                        }
+                        delete [] invmap;
+                    }
+                    else TMalign_main(
+                        xa, ya, seqx, seqy, secx, secy,
+                        t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                        seqM, seqxA, seqyA,
+                        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                        i_opt, a_opt, u_opt, d_opt, force_fast_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j],TMcut);
+
+                    /* print result */
+                    if (outfmt_opt==0) print_version();
+                    int left_num=0;
+                    int right_num=0;
+                    int left_aln_num=0;
+                    int right_aln_num=0;
+                    bool after_cp=false;
+                    if (cp_opt) after_cp=output_cp(
+                        xname.substr(dir1_opt.size()+dir_opt.size()),
+                        yname.substr(dir2_opt.size()+dir_opt.size()),
+                        seqxA,seqyA,outfmt_opt,left_num,right_num,
+                        left_aln_num,right_aln_num);
+                    output_results(
+                        xname.substr(dir1_opt.size()+dir_opt.size()),
+                        yname.substr(dir2_opt.size()+dir_opt.size()),
+                        chainID_list1[chain_i], chainID_list2[chain_j],
+                        xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        rmsd0, d0_out, seqM.c_str(),
+                        seqxA.c_str(), seqyA.c_str(), Liden,
+                        n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0,
+                        d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, 
+                        (m_opt?fname_matrix:"").c_str(),
+                        outfmt_opt, ter_opt, false, split_opt, o_opt,
+                        fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt,
+                        resi_vec1, resi_vec2);
+                    if (cp_opt && outfmt_opt<=0)
+                    {
+                        cout<<"###############\t###############\n"
+                            <<"#Aligned atom 1\tAligned atom 2#\n";
+                        size_t r1=right_num;
+                        size_t r2=0;
+                        size_t r;
+                        for (r=0;r<seqxA.size();r++)
+                        {
+                            r1+=seqxA[r]!='-';
+                            r2+=seqyA[r]!='-';
+                            if (seqxA[r]=='*')
+                            {
+                                cout<<"###### Circular\tPermutation ###\n";
+                                r1=0;
+                            }
+                            else if (seqxA[r]!='-' && seqyA[r]!='-')
+                            {
+                                cout<<PDB_lines1[chain_i][r1-1].substr(12,15)<<'\t'
+                                    <<PDB_lines2[chain_j][r2-1].substr(12,15)<<'\n';
+                            }
+                        }
+                        cout<<"###############\t###############"<<endl;
+                    }
+
+                    /* Done! Free memory */
+                    seqM.clear();
+                    seqxA.clear();
+                    seqyA.clear();
+                    DeleteArray(&ya, ylen);
+                    delete [] seqy;
+                    delete [] secy;
+                    resi_vec2.clear();
+                } // chain_j
+                if (chain2_list.size()>1)
+                {
+                    yname.clear();
+                    for (chain_j=0;chain_j<ychainnum;chain_j++)
+                        PDB_lines2[chain_j].clear();
+                    PDB_lines2.clear();
+                    chainID_list2.clear();
+                    mol_vec2.clear();
+                }
+            } // j
+            PDB_lines1[chain_i].clear();
+            DeleteArray(&xa, xlen);
+            delete [] seqx;
+            delete [] secx;
+            resi_vec1.clear();
+        } // chain_i
+        xname.clear();
+        PDB_lines1.clear();
+        chainID_list1.clear();
+        mol_vec1.clear();
+    } // i
+    if (chain2_list.size()==1)
+    {
+        yname.clear();
+        for (chain_j=0;chain_j<ychainnum;chain_j++)
+            PDB_lines2[chain_j].clear();
+        PDB_lines2.clear();
+        resi_vec2.clear();
+        chainID_list2.clear();
+        mol_vec2.clear();
+    }
+    return 0;
+}
+
+/* MMalign if more than two chains. TMalign if only one chain */
+int MMalign(const string &xname, const string &yname,
+    const string &fname_super, const string &fname_lign,
+    const string &fname_matrix, vector<string> &sequence,
+    const double d0_scale, const bool m_opt, const int o_opt,
+    const int a_opt, const bool d_opt, const bool full_opt,
+    const double TMcut, const int infmt1_opt, const int infmt2_opt,
+    const int ter_opt, const int split_opt, const int outfmt_opt,
+    bool fast_opt, const int mirror_opt, const int het_opt,
+    const string &atom_opt, const string &mol_opt,
+    const string &dir1_opt, const string &dir2_opt,
+    const vector<string> &chain1_list, const vector<string> &chain2_list,
+    const int byresi_opt)
+{
+    /* declare previously global variables */
+    vector<vector<vector<double> > > xa_vec; // structure of complex1
+    vector<vector<vector<double> > > ya_vec; // structure of complex2
+    vector<vector<char> >seqx_vec; // sequence of complex1
+    vector<vector<char> >seqy_vec; // sequence of complex2
+    vector<vector<char> >secx_vec; // secondary structure of complex1
+    vector<vector<char> >secy_vec; // secondary structure of complex2
+    vector<int> mol_vec1;          // molecule type of complex1, RNA if >0
+    vector<int> mol_vec2;          // molecule type of complex2, RNA if >0
+    vector<string> chainID_list1;  // list of chainID1
+    vector<string> chainID_list2;  // list of chainID2
+    vector<int> xlen_vec;          // length of complex1
+    vector<int> ylen_vec;          // length of complex2
+    int    i,j;                    // chain index
+    int    xlen, ylen;             // chain length
+    double **xa, **ya;             // structure of single chain
+    char   *seqx, *seqy;           // for the protein sequence 
+    char   *secx, *secy;           // for the secondary structure 
+    int    xlen_aa,ylen_aa;        // total length of protein
+    int    xlen_na,ylen_na;        // total length of RNA/DNA
+    vector<string> resi_vec1;  // residue index for chain1
+    vector<string> resi_vec2;  // residue index for chain2
+
+    /* parse complex */
+    parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1,
+        xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt,
+        atom_opt, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, resi_vec1);
+    if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 1");
+    parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2,
+        ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt,
+        atom_opt, 0, het_opt, ylen_aa, ylen_na, o_opt, resi_vec2);
+    if (ya_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 2");
+    int len_aa=getmin(xlen_aa,ylen_aa);
+    int len_na=getmin(xlen_na,ylen_na);
+    if (a_opt)
+    {
+        len_aa=(xlen_aa+ylen_aa)/2;
+        len_na=(xlen_na+ylen_na)/2;
+    }
+    int i_opt=0;
+    if (byresi_opt) i_opt=3;
+
+    /* perform monomer alignment if there is only one chain */
+    if (xa_vec.size()==1 && ya_vec.size()==1)
+    {
+        xlen = xlen_vec[0];
+        ylen = ylen_vec[0];
+        seqx = new char[xlen+1];
+        seqy = new char[ylen+1];
+        secx = new char[xlen+1];
+        secy = new char[ylen+1];
+        NewArray(&xa, xlen, 3);
+        NewArray(&ya, ylen, 3);
+        copy_chain_data(xa_vec[0],seqx_vec[0],secx_vec[0], xlen,xa,seqx,secx);
+        copy_chain_data(ya_vec[0],seqy_vec[0],secy_vec[0], ylen,ya,seqy,secy);
+        
+        /* declare variable specific to this pair of TMalign */
+        double t0[3], u0[3][3];
+        double TM1, TM2;
+        double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+        double d0_0, TM_0;
+        double d0A, d0B, d0u, d0a;
+        double d0_out=5.0;
+        string seqM, seqxA, seqyA;// for output alignment
+        double rmsd0 = 0.0;
+        int L_ali;                // Aligned length in standard_TMscore
+        double Liden=0;
+        double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+        int n_ali=0;
+        int n_ali8=0;
+        
+        if (byresi_opt) extract_aln_from_resi(sequence,
+            seqx,seqy,resi_vec1,resi_vec2,byresi_opt);
+
+        /* entry function for structure alignment */
+        TMalign_main(xa, ya, seqx, seqy, secx, secy,
+            t0, u0, TM1, TM2, TM3, TM4, TM5,
+            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+            seqM, seqxA, seqyA,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+            xlen, ylen, sequence, 0, d0_scale,
+            i_opt, a_opt, false, d_opt, fast_opt,
+            mol_vec1[0]+mol_vec2[0],TMcut);
+
+        /* print result */
+        output_results(
+            xname.substr(dir1_opt.size()),
+            yname.substr(dir2_opt.size()),
+            chainID_list1[0], chainID_list2[0],
+            xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
+            seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
+            n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B,
+            0, d0_scale, d0a, d0u, (m_opt?fname_matrix:"").c_str(),
+            outfmt_opt, ter_opt, true, split_opt, o_opt, fname_super,
+            0, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2);
+
+        /* clean up */
+        seqM.clear();
+        seqxA.clear();
+        seqyA.clear();
+        delete[]seqx;
+        delete[]seqy;
+        delete[]secx;
+        delete[]secy;
+        DeleteArray(&xa,xlen);
+        DeleteArray(&ya,ylen);
+
+        vector<vector<vector<double> > >().swap(xa_vec); // structure of complex1
+        vector<vector<vector<double> > >().swap(ya_vec); // structure of complex2
+        vector<vector<char> >().swap(seqx_vec); // sequence of complex1
+        vector<vector<char> >().swap(seqy_vec); // sequence of complex2
+        vector<vector<char> >().swap(secx_vec); // secondary structure of complex1
+        vector<vector<char> >().swap(secy_vec); // secondary structure of complex2
+        mol_vec1.clear();       // molecule type of complex1, RNA if >0
+        mol_vec2.clear();       // molecule type of complex2, RNA if >0
+        chainID_list1.clear();  // list of chainID1
+        chainID_list2.clear();  // list of chainID2
+        xlen_vec.clear();       // length of complex1
+        ylen_vec.clear();       // length of complex2
+        return 0;
+    }
+
+    /* declare TM-score tables */
+    int chain1_num=xa_vec.size();
+    int chain2_num=ya_vec.size();
+    vector<string> tmp_str_vec(chain2_num,"");
+    double **TMave_mat;
+    double **ut_mat; // rotation matrices for all-against-all alignment
+    int ui,uj,ut_idx;
+    NewArray(&TMave_mat,chain1_num,chain2_num);
+    NewArray(&ut_mat,chain1_num*chain2_num,4*3);
+    vector<vector<string> >seqxA_mat(chain1_num,tmp_str_vec);
+    vector<vector<string> > seqM_mat(chain1_num,tmp_str_vec);
+    vector<vector<string> >seqyA_mat(chain1_num,tmp_str_vec);
+
+    double maxTMmono=-1;
+    int maxTMmono_i,maxTMmono_j;
+
+    /* get all-against-all alignment */
+    if (len_aa+len_na>500) fast_opt=true;
+    for (i=0;i<chain1_num;i++)
+    {
+        xlen=xlen_vec[i];
+        if (xlen<3)
+        {
+            for (j=0;j<chain2_num;j++) TMave_mat[i][j]=-1;
+            continue;
+        }
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
+            xlen,xa,seqx,secx);
+
+        for (j=0;j<chain2_num;j++)
+        {
+            ut_idx=i*chain2_num+j;
+            for (ui=0;ui<4;ui++)
+                for (uj=0;uj<3;uj++) ut_mat[ut_idx][ui*3+uj]=0;
+            ut_mat[ut_idx][0]=1;
+            ut_mat[ut_idx][4]=1;
+            ut_mat[ut_idx][8]=1;
+
+            if (mol_vec1[i]*mol_vec2[j]<0) //no protein-RNA alignment
+            {
+                TMave_mat[i][j]=-1;
+                continue;
+            }
+
+            ylen=ylen_vec[j];
+            if (ylen<3)
+            {
+                TMave_mat[i][j]=-1;
+                continue;
+            }
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            NewArray(&ya, ylen, 3);
+            copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
+                ylen,ya,seqy,secy);
+
+            /* declare variable specific to this pair of TMalign */
+            double t0[3], u0[3][3];
+            double TM1, TM2;
+            double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+            double d0_0, TM_0;
+            double d0A, d0B, d0u, d0a;
+            double d0_out=5.0;
+            string seqM, seqxA, seqyA;// for output alignment
+            double rmsd0 = 0.0;
+            int L_ali;                // Aligned length in standard_TMscore
+            double Liden=0;
+            double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+            int n_ali=0;
+            int n_ali8=0;
+
+            int Lnorm_tmp=len_aa;
+            if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na;
+            
+            if (byresi_opt)
+            {
+                int total_aln=extract_aln_from_resi(sequence,
+                    seqx,seqy,resi_vec1,resi_vec2,xlen_vec,ylen_vec, i, j);
+                seqxA_mat[i][j]=sequence[0];
+                seqyA_mat[i][j]=sequence[1];
+                if (total_aln>xlen+ylen-3)
+                {
+                    for (ui=0;ui<3;ui++) for (uj=0;uj<3;uj++) 
+                        ut_mat[ut_idx][ui*3+uj]=(ui==uj)?1:0;
+                    for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=0;
+                    TMave_mat[i][j]=0;
+                    seqM.clear();
+                    seqxA.clear();
+                    seqyA.clear();
+
+                    delete[]seqy;
+                    delete[]secy;
+                    DeleteArray(&ya,ylen);
+                    continue;
+                }
+            }
+
+            /* entry function for structure alignment */
+            TMalign_main(xa, ya, seqx, seqy, secx, secy,
+                t0, u0, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_tmp, d0_scale,
+                i_opt, false, true, false, fast_opt,
+                mol_vec1[i]+mol_vec2[j],TMcut);
+
+            /* store result */
+            for (ui=0;ui<3;ui++)
+                for (uj=0;uj<3;uj++) ut_mat[ut_idx][ui*3+uj]=u0[ui][uj];
+            for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=t0[uj];
+            seqxA_mat[i][j]=seqxA;
+            seqyA_mat[i][j]=seqyA;
+            TMave_mat[i][j]=TM4*Lnorm_tmp;
+            if (TMave_mat[i][j]>maxTMmono)
+            {
+                maxTMmono=TMave_mat[i][j];
+                maxTMmono_i=i;
+                maxTMmono_j=j;
+            }
+
+            /* clean up */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+        }
+
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+    }
+
+    /* calculate initial chain-chain assignment */
+    int *assign1_list; // value is index of assigned chain2
+    int *assign2_list; // value is index of assigned chain1
+    assign1_list=new int[chain1_num];
+    assign2_list=new int[chain2_num];
+    double total_score=enhanced_greedy_search(TMave_mat, assign1_list,
+        assign2_list, chain1_num, chain2_num);
+    if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain");
+
+    /* refine alignment for large oligomers */
+    int aln_chain_num=count_assign_pair(assign1_list,chain1_num);
+    bool is_oligomer=(aln_chain_num>=3);
+    if (aln_chain_num==2) // dimer alignment
+    {
+        int na_chain_num1,na_chain_num2,aa_chain_num1,aa_chain_num2;
+        count_na_aa_chain_num(na_chain_num1,aa_chain_num1,mol_vec1);
+        count_na_aa_chain_num(na_chain_num2,aa_chain_num2,mol_vec2);
+
+        /* align protein-RNA hybrid dimer to another hybrid dimer */
+        if (na_chain_num1==1 && na_chain_num2==1 && 
+            aa_chain_num1==1 && aa_chain_num2==1) is_oligomer=false;
+        /* align pure protein dimer or pure RNA dimer */
+        else if ((getmin(na_chain_num1,na_chain_num2)==0 && 
+                    aa_chain_num1==2 && aa_chain_num2==2) ||
+                 (getmin(aa_chain_num1,aa_chain_num2)==0 && 
+                    na_chain_num1==2 && na_chain_num2==2))
+        {
+            adjust_dimer_assignment(xa_vec,ya_vec,xlen_vec,ylen_vec,mol_vec1,
+                mol_vec2,assign1_list,assign2_list,seqxA_mat,seqyA_mat);
+            is_oligomer=false; // cannot refiner further
+        }
+        else is_oligomer=true; /* align oligomers to dimer */
+    }
+
+    if (aln_chain_num>=3 || is_oligomer) // oligomer alignment
+    {
+        /* extract centroid coordinates */
+        double **xcentroids;
+        double **ycentroids;
+        NewArray(&xcentroids, chain1_num, 3);
+        NewArray(&ycentroids, chain2_num, 3);
+        double d0MM=getmin(
+            calculate_centroids(xa_vec, chain1_num, xcentroids),
+            calculate_centroids(ya_vec, chain2_num, ycentroids));
+
+        /* refine enhanced greedy search with centroid superposition */
+        //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num);
+        homo_refined_greedy_search(TMave_mat, assign1_list,
+            assign2_list, chain1_num, chain2_num, xcentroids,
+            ycentroids, d0MM, len_aa+len_na, ut_mat);
+        hetero_refined_greedy_search(TMave_mat, assign1_list,
+            assign2_list, chain1_num, chain2_num, xcentroids,
+            ycentroids, d0MM, len_aa+len_na);
+        
+        /* clean up */
+        DeleteArray(&xcentroids, chain1_num);
+        DeleteArray(&ycentroids, chain2_num);
+    }
+
+    /* store initial assignment */
+    int init_pair_num=count_assign_pair(assign1_list,chain1_num);
+    int *assign1_init, *assign2_init;
+    assign1_init=new int[chain1_num];
+    assign2_init=new int[chain2_num];
+    double **TMave_init;
+    NewArray(&TMave_init,chain1_num,chain2_num);
+    vector<vector<string> >seqxA_init(chain1_num,tmp_str_vec);
+    vector<vector<string> >seqyA_init(chain1_num,tmp_str_vec);
+    vector<string> sequence_init;
+    copy_chain_assign_data(chain1_num, chain2_num, sequence_init,
+        seqxA_mat,  seqyA_mat,  assign1_list, assign2_list, TMave_mat,
+        seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init);
+
+    /* perform iterative alignment */
+    double max_total_score=0; // ignore old total_score because previous
+                              // score was from monomeric chain superpositions
+    int max_iter=5-(int)((len_aa+len_na)/200);
+    if (max_iter<2) max_iter=2;
+    if (byresi_opt==0) MMalign_iter(max_total_score, max_iter, xa_vec, ya_vec,
+        seqx_vec, seqy_vec, secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec,
+        ylen_vec, xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num,
+        chain2_num, TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list,
+        sequence, d0_scale, fast_opt);
+
+    /* sometime MMalign_iter is even worse than monomer alignment */
+    if (byresi_opt==0 && max_total_score<maxTMmono)
+    {
+        copy_chain_assign_data(chain1_num, chain2_num, sequence,
+            seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init,
+            seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat);
+        for (i=0;i<chain1_num;i++)
+        {
+            if (i!=maxTMmono_i) assign1_list[i]=-1;
+            else assign1_list[i]=maxTMmono_j;
+        }
+        for (j=0;j<chain2_num;j++)
+        {
+            if (j!=maxTMmono_j) assign2_list[j]=-1;
+            else assign2_list[j]=maxTMmono_i;
+        }
+        sequence[0]=seqxA_mat[maxTMmono_i][maxTMmono_j];
+        sequence[1]=seqyA_mat[maxTMmono_i][maxTMmono_j];
+        max_total_score=maxTMmono;
+        MMalign_iter(max_total_score, max_iter, xa_vec, ya_vec, seqx_vec, seqy_vec,
+            secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+            xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num,
+            TMave_mat, seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence,
+            d0_scale, fast_opt);
+    }
+
+    /* perform cross chain alignment
+     * in some cases, this leads to dramatic improvement, esp for homodimer */
+    int iter_pair_num=count_assign_pair(assign1_list,chain1_num);
+    if (iter_pair_num>=init_pair_num) copy_chain_assign_data(
+        chain1_num, chain2_num, sequence_init,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, TMave_mat,
+        seqxA_init, seqyA_init, assign1_init,  assign2_init,  TMave_init);
+    double max_total_score_cross=max_total_score;
+    if (byresi_opt==0 && len_aa+len_na<10000)
+    {
+        MMalign_dimer(max_total_score_cross, xa_vec, ya_vec, seqx_vec, seqy_vec,
+            secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+            xa, ya, seqx, seqy, secx, secy, len_aa, len_na, chain1_num, chain2_num,
+            TMave_init, seqxA_init, seqyA_init, assign1_init, assign2_init,
+            sequence_init, d0_scale, fast_opt);
+        if (max_total_score_cross>max_total_score) 
+        {
+            max_total_score=max_total_score_cross;
+            copy_chain_assign_data(chain1_num, chain2_num, sequence,
+                seqxA_init, seqyA_init, assign1_init, assign2_init, TMave_init,
+                seqxA_mat,  seqyA_mat,  assign1_list, assign2_list, TMave_mat);
+        }
+    } 
+
+    /* final alignment */
+    if (outfmt_opt==0) print_version();
+    MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()),
+        chainID_list1, chainID_list2,
+        fname_super, fname_lign, fname_matrix,
+        xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, len_aa, len_na,
+        chain1_num, chain2_num, TMave_mat,
+        seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence,
+        d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt,
+        a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2);
+
+    /* clean up everything */
+    delete [] assign1_list;
+    delete [] assign2_list;
+    DeleteArray(&TMave_mat,chain1_num);
+    DeleteArray(&ut_mat,   chain1_num*chain2_num);
+    vector<vector<string> >().swap(seqxA_mat);
+    vector<vector<string> >().swap(seqM_mat);
+    vector<vector<string> >().swap(seqyA_mat);
+    vector<string>().swap(tmp_str_vec);
+
+    delete [] assign1_init;
+    delete [] assign2_init;
+    DeleteArray(&TMave_init,chain1_num);
+    vector<vector<string> >().swap(seqxA_init);
+    vector<vector<string> >().swap(seqyA_init);
+
+    vector<vector<vector<double> > >().swap(xa_vec); // structure of complex1
+    vector<vector<vector<double> > >().swap(ya_vec); // structure of complex2
+    vector<vector<char> >().swap(seqx_vec); // sequence of complex1
+    vector<vector<char> >().swap(seqy_vec); // sequence of complex2
+    vector<vector<char> >().swap(secx_vec); // secondary structure of complex1
+    vector<vector<char> >().swap(secy_vec); // secondary structure of complex2
+    mol_vec1.clear();       // molecule type of complex1, RNA if >0
+    mol_vec2.clear();       // molecule type of complex2, RNA if >0
+    vector<string>().swap(chainID_list1);  // list of chainID1
+    vector<string>().swap(chainID_list2);  // list of chainID2
+    xlen_vec.clear();       // length of complex1
+    ylen_vec.clear();       // length of complex2
+    vector<string> ().swap(resi_vec1);  // residue index for chain1
+    vector<string> ().swap(resi_vec2);  // residue index for chain2
+    return 1;
+}
+
+
+/* alignment individual chains to a complex. */
+int MMdock(const string &xname, const string &yname, const string &fname_super, 
+    const string &fname_matrix, vector<string> &sequence, const double Lnorm_ass,
+    const double d0_scale, const bool m_opt, const int o_opt,
+    const int a_opt, const bool u_opt, const bool d_opt,
+    const double TMcut, const int infmt1_opt, const int infmt2_opt,
+    const int ter_opt, const int split_opt, const int outfmt_opt,
+    bool fast_opt, const int mirror_opt, const int het_opt,
+    const string &atom_opt, const string &mol_opt,
+    const string &dir1_opt, const string &dir2_opt,
+    const vector<string> &chain1_list, const vector<string> &chain2_list)
+{
+    /* declare previously global variables */
+    vector<vector<vector<double> > > xa_vec; // structure of complex1
+    vector<vector<vector<double> > > ya_vec; // structure of complex2
+    vector<vector<char> >seqx_vec; // sequence of complex1
+    vector<vector<char> >seqy_vec; // sequence of complex2
+    vector<vector<char> >secx_vec; // secondary structure of complex1
+    vector<vector<char> >secy_vec; // secondary structure of complex2
+    vector<int> mol_vec1;          // molecule type of complex1, RNA if >0
+    vector<int> mol_vec2;          // molecule type of complex2, RNA if >0
+    vector<string> chainID_list1;  // list of chainID1
+    vector<string> chainID_list2;  // list of chainID2
+    vector<int> xlen_vec;          // length of complex1
+    vector<int> ylen_vec;          // length of complex2
+    int    i,j;                    // chain index
+    int    xlen, ylen;             // chain length
+    double **xa, **ya;             // structure of single chain
+    char   *seqx, *seqy;           // for the protein sequence 
+    char   *secx, *secy;           // for the secondary structure 
+    int    xlen_aa,ylen_aa;        // total length of protein
+    int    xlen_na,ylen_na;        // total length of RNA/DNA
+    vector<string> resi_vec1;  // residue index for chain1
+    vector<string> resi_vec2;  // residue index for chain2
+
+    /* parse complex */
+    parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1,
+        xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt,
+        atom_opt, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, resi_vec1);
+    if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 individual chain");
+    parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2,
+        ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt,
+        atom_opt, 0, het_opt, ylen_aa, ylen_na, o_opt, resi_vec2);
+    if (xa_vec.size()>ya_vec.size()) PrintErrorAndQuit(
+        "ERROR! more individual chains to align than number of chains in complex template");
+    int len_aa=getmin(xlen_aa,ylen_aa);
+    int len_na=getmin(xlen_na,ylen_na);
+    if (a_opt)
+    {
+        len_aa=(xlen_aa+ylen_aa)/2;
+        len_na=(xlen_na+ylen_na)/2;
+    }
+
+    /* perform monomer alignment if there is only one chain */
+    if (xa_vec.size()==1 && ya_vec.size()==1)
+    {
+        xlen = xlen_vec[0];
+        ylen = ylen_vec[0];
+        seqx = new char[xlen+1];
+        seqy = new char[ylen+1];
+        secx = new char[xlen+1];
+        secy = new char[ylen+1];
+        NewArray(&xa, xlen, 3);
+        NewArray(&ya, ylen, 3);
+        copy_chain_data(xa_vec[0],seqx_vec[0],secx_vec[0], xlen,xa,seqx,secx);
+        copy_chain_data(ya_vec[0],seqy_vec[0],secy_vec[0], ylen,ya,seqy,secy);
+        
+        /* declare variable specific to this pair of TMalign */
+        double t0[3], u0[3][3];
+        double TM1, TM2;
+        double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+        double d0_0, TM_0;
+        double d0A, d0B, d0u, d0a;
+        double d0_out=5.0;
+        string seqM, seqxA, seqyA;// for output alignment
+        double rmsd0 = 0.0;
+        int L_ali;                // Aligned length in standard_TMscore
+        double Liden=0;
+        double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+        int n_ali=0;
+        int n_ali8=0;
+
+        /* entry function for structure alignment */
+        TMalign_main(xa, ya, seqx, seqy, secx, secy,
+            t0, u0, TM1, TM2, TM3, TM4, TM5,
+            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+            seqM, seqxA, seqyA,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+            xlen, ylen, sequence, Lnorm_ass, d0_scale,
+            0, a_opt, u_opt, d_opt, fast_opt,
+            mol_vec1[0]+mol_vec2[0],TMcut);
+
+        /* print result */
+        output_results(
+            xname.substr(dir1_opt.size()),
+            yname.substr(dir2_opt.size()),
+            chainID_list1[0], chainID_list2[0],
+            xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
+            seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
+            n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B,
+            Lnorm_ass, d0_scale, d0a, d0u, (m_opt?fname_matrix:"").c_str(),
+            (outfmt_opt==2?outfmt_opt:3), ter_opt, true, split_opt, o_opt, fname_super,
+            0, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2);
+        if (outfmt_opt==2) printf("%s%s\t%s%s\t%.4f\n",
+            xname.substr(dir1_opt.size()).c_str(), chainID_list1[0].c_str(), 
+            yname.substr(dir2_opt.size()).c_str(), chainID_list2[0].c_str(),
+            sqrt((TM1*TM1+TM2*TM2)/2));
+
+        /* clean up */
+        seqM.clear();
+        seqxA.clear();
+        seqyA.clear();
+        delete[]seqx;
+        delete[]seqy;
+        delete[]secx;
+        delete[]secy;
+        DeleteArray(&xa,xlen);
+        DeleteArray(&ya,ylen);
+
+        vector<vector<vector<double> > >().swap(xa_vec); // structure of complex1
+        vector<vector<vector<double> > >().swap(ya_vec); // structure of complex2
+        vector<vector<char> >().swap(seqx_vec); // sequence of complex1
+        vector<vector<char> >().swap(seqy_vec); // sequence of complex2
+        vector<vector<char> >().swap(secx_vec); // secondary structure of complex1
+        vector<vector<char> >().swap(secy_vec); // secondary structure of complex2
+        mol_vec1.clear();       // molecule type of complex1, RNA if >0
+        mol_vec2.clear();       // molecule type of complex2, RNA if >0
+        chainID_list1.clear();  // list of chainID1
+        chainID_list2.clear();  // list of chainID2
+        xlen_vec.clear();       // length of complex1
+        ylen_vec.clear();       // length of complex2
+        return 0;
+    }
+
+    /* declare TM-score tables */
+    int chain1_num=xa_vec.size();
+    int chain2_num=ya_vec.size();
+    vector<string> tmp_str_vec(chain2_num,"");
+    double **TMave_mat;
+    NewArray(&TMave_mat,chain1_num,chain2_num);
+    vector<vector<string> >seqxA_mat(chain1_num,tmp_str_vec);
+    vector<vector<string> > seqM_mat(chain1_num,tmp_str_vec);
+    vector<vector<string> >seqyA_mat(chain1_num,tmp_str_vec);
+
+    /* trimComplex */
+    vector<vector<vector<double> > > ya_trim_vec; // structure of complex2
+    vector<vector<char> >seqy_trim_vec; // sequence of complex2
+    vector<vector<char> >secy_trim_vec; // secondary structure of complex2
+    vector<int> ylen_trim_vec;          // length of complex2
+    int Lchain_aa_max1=0;
+    int Lchain_na_max1=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        xlen=xlen_vec[i];
+        if      (mol_vec1[i]>0  && xlen>Lchain_na_max1) Lchain_na_max1=xlen;
+        else if (mol_vec1[i]<=0 && xlen>Lchain_aa_max1) Lchain_aa_max1=xlen;
+    }
+    int trim_chain_count=trimComplex(ya_trim_vec,seqy_trim_vec,
+        secy_trim_vec,ylen_trim_vec,ya_vec,seqy_vec,secy_vec,ylen_vec,
+        mol_vec2,Lchain_aa_max1,Lchain_na_max1);
+    int    ylen_trim;             // chain length
+    double **ya_trim;             // structure of single chain
+    char   *seqy_trim;           // for the protein sequence
+    char   *secy_trim;           // for the secondary structure
+    double **xt;
+
+    /* get all-against-all alignment */
+    if (len_aa+len_na>500) fast_opt=true;
+    for (i=0;i<chain1_num;i++)
+    {
+        xlen=xlen_vec[i];
+        if (xlen<3)
+        {
+            for (j=0;j<chain2_num;j++) TMave_mat[i][j]=-1;
+            continue;
+        }
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
+            xlen,xa,seqx,secx);
+
+        for (j=0;j<chain2_num;j++)
+        {
+            if (mol_vec1[i]*mol_vec2[j]<0) //no protein-RNA alignment
+            {
+                TMave_mat[i][j]=-1;
+                continue;
+            }
+
+            ylen=ylen_vec[j];
+            if (ylen<3)
+            {
+                TMave_mat[i][j]=-1;
+                continue;
+            }
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            NewArray(&ya, ylen, 3);
+            copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
+                ylen,ya,seqy,secy);
+
+            /* declare variable specific to this pair of TMalign */
+            double t0[3], u0[3][3];
+            double TM1, TM2;
+            double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+            double d0_0, TM_0;
+            double d0A, d0B, d0u, d0a;
+            double d0_out=5.0;
+            string seqM, seqxA, seqyA;// for output alignment
+            double rmsd0 = 0.0;
+            int L_ali;                // Aligned length in standard_TMscore
+            double Liden=0;
+            double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+            int n_ali=0;
+            int n_ali8=0;
+
+            int Lnorm_tmp=len_aa;
+            if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na;
+
+            /* entry function for structure alignment */
+            if (trim_chain_count && ylen_trim_vec[j]<ylen)
+            {
+                ylen_trim = ylen_trim_vec[j];
+                seqy_trim = new char[ylen_trim+1];
+                secy_trim = new char[ylen_trim+1];
+                NewArray(&ya_trim, ylen_trim, 3);
+                copy_chain_data(ya_trim_vec[j],seqy_trim_vec[j],secy_trim_vec[j],
+                    ylen_trim,ya_trim,seqy_trim,secy_trim);
+                TMalign_main(xa, ya_trim, seqx, seqy_trim, secx, secy_trim,
+                    t0, u0, TM1, TM2, TM3, TM4, TM5,
+                    d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                    seqM, seqxA, seqyA,
+                    rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                    xlen, ylen_trim, sequence, Lnorm_tmp, d0_scale,
+                    0, false, true, false, fast_opt,
+                    mol_vec1[i]+mol_vec2[j],TMcut);
+                seqxA.clear();
+                seqyA.clear();
+                delete[]seqy_trim;
+                delete[]secy_trim;
+                DeleteArray(&ya_trim,ylen_trim);
+
+                NewArray(&xt,xlen,3);
+                do_rotation(xa, xt, xlen, t0, u0);
+                int *invmap = new int[ylen+1];
+                se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+                    d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+                    rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                    xlen, ylen, sequence, Lnorm_tmp, d0_scale,
+                    0, false, 2, false, mol_vec1[i]+mol_vec2[j], 1, invmap);
+                delete[]invmap;
+                
+                if (sequence.size()<2) sequence.push_back("");
+                if (sequence.size()<2) sequence.push_back("");
+                sequence[0]=seqxA;
+                sequence[1]=seqyA;
+                TMalign_main(xt, ya, seqx, seqy, secx, secy,
+                    t0, u0, TM1, TM2, TM3, TM4, TM5,
+                    d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                    seqM, seqxA, seqyA,
+                    rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                    xlen, ylen, sequence, Lnorm_tmp, d0_scale,
+                    2, false, true, false, fast_opt,
+                    mol_vec1[i]+mol_vec2[j],TMcut);
+                DeleteArray(&xt, xlen);
+            }
+            else
+            {
+                TMalign_main(xa, ya, seqx, seqy, secx, secy,
+                    t0, u0, TM1, TM2, TM3, TM4, TM5,
+                    d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                    seqM, seqxA, seqyA,
+                    rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                    xlen, ylen, sequence, Lnorm_tmp, d0_scale,
+                    0, false, true, false, fast_opt,
+                    mol_vec1[i]+mol_vec2[j],TMcut);
+            }
+            
+            /* store result */
+            seqxA_mat[i][j]=seqxA;
+            seqyA_mat[i][j]=seqyA;
+            TMave_mat[i][j]=TM4*Lnorm_tmp;
+
+            /* clean up */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+        }
+
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+    }
+    vector<vector<vector<double> > >().swap(ya_trim_vec);
+    vector<vector<char> >().swap(seqy_trim_vec);
+    vector<vector<char> >().swap(secy_trim_vec);
+    vector<int> ().swap(ylen_trim_vec);
+
+    /* calculate initial chain-chain assignment */
+    int *assign1_list; // value is index of assigned chain2
+    int *assign2_list; // value is index of assigned chain1
+    assign1_list=new int[chain1_num];
+    assign2_list=new int[chain2_num];
+    enhanced_greedy_search(TMave_mat, assign1_list,
+        assign2_list, chain1_num, chain2_num);
+
+    /* final alignment */
+    if (outfmt_opt==0) print_version();
+    double **ut_mat; // rotation matrices for all-against-all alignment
+    NewArray(&ut_mat,chain1_num,4*3);
+    int ui,uj;
+    vector<string>xname_vec;
+    vector<string>yname_vec;
+    vector<double>TM_vec;
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        xname_vec.push_back(xname+chainID_list1[i]);
+        if (j<0)
+        {
+            cerr<<"Warning! "<<chainID_list1[i]<<" cannot be alighed"<<endl;
+            for (ui=0;ui<3;ui++)
+            {
+                for (uj=0;uj<4;uj++) ut_mat[i][ui*3+uj]=0;
+                ut_mat[i][ui*3+ui]=1;
+            }
+            yname_vec.push_back(yname);
+            continue;
+        }
+        yname_vec.push_back(yname+chainID_list2[j]);
+
+        xlen =xlen_vec[i];
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i], xlen,xa,seqx,secx);
+
+        ylen =ylen_vec[j];
+        seqy = new char[ylen+1];
+        secy = new char[ylen+1];
+        NewArray(&ya, ylen, 3);
+        copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j], ylen,ya,seqy,secy);
+
+        /* declare variable specific to this pair of TMalign */
+        double t0[3], u0[3][3];
+        double TM1, TM2;
+        double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+        double d0_0, TM_0;
+        double d0A, d0B, d0u, d0a;
+        double d0_out=5.0;
+        string seqM, seqxA, seqyA;// for output alignment
+        double rmsd0 = 0.0;
+        int L_ali;                // Aligned length in standard_TMscore
+        double Liden=0;
+        double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+        int n_ali=0;
+        int n_ali8=0;
+
+        int c;
+        for (c=0; c<sequence.size(); c++) sequence[c].clear();
+        sequence.clear();
+        sequence.push_back(seqxA_mat[i][j]);
+        sequence.push_back(seqyA_mat[i][j]);
+            
+        /* entry function for structure alignment */
+        TMalign_main(xa, ya, seqx, seqy, secx, secy,
+            t0, u0, TM1, TM2, TM3, TM4, TM5,
+            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+            seqM, seqxA, seqyA,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+            xlen, ylen, sequence, Lnorm_ass, d0_scale,
+            3, a_opt, u_opt, d_opt, fast_opt,
+            mol_vec1[i]+mol_vec2[j]);
+        
+        for (ui=0;ui<3;ui++) for (uj=0;uj<3;uj++) ut_mat[i][ui*3+uj]=u0[ui][uj];
+        for (uj=0;uj<3;uj++) ut_mat[i][9+uj]=t0[uj];
+
+        TM_vec.push_back(TM1);
+        TM_vec.push_back(TM2);
+
+        if (outfmt_opt<2) output_results(
+            xname.c_str(), yname.c_str(),
+            chainID_list1[i], chainID_list2[j],
+            xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5,
+            rmsd0, d0_out, seqM.c_str(),
+            seqxA.c_str(), seqyA.c_str(), Liden,
+            n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0,
+            d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, 
+            "", outfmt_opt, ter_opt, false, split_opt, 
+            false, "",//o_opt, fname_super+chainID_list1[i], 
+            false, a_opt, u_opt, d_opt, mirror_opt,
+            resi_vec1, resi_vec2);
+        
+        /* clean up */
+        seqM.clear();
+        seqxA.clear();
+        seqyA.clear();
+
+        delete[]seqy;
+        delete[]secy;
+        DeleteArray(&ya,ylen);
+
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+    }
+    if (outfmt_opt==2)
+    {
+        double TM=0;
+        for (i=0;i<TM_vec.size();i++) TM+=TM_vec[i]*TM_vec[i];
+        TM=sqrt(TM/TM_vec.size());
+        string query_name=xname;
+        string template_name=yname;
+        for (i=0;i<chain1_num;i++)
+        {
+            j=assign1_list[i];
+            if (j<0) continue;
+            query_name   +=chainID_list1[i];
+            template_name+=chainID_list2[j];
+        }
+        printf("%s\t%s\t%.4f\n",query_name.c_str(),template_name.c_str(),TM);
+        query_name.clear();
+        template_name.clear();
+    }
+
+    if (m_opt) output_dock_rotation_matrix(fname_matrix.c_str(),
+        xname_vec,yname_vec, ut_mat, assign1_list);
+
+    if (o_opt) output_dock(chain1_list, ter_opt, split_opt, infmt1_opt,
+        atom_opt, mirror_opt, ut_mat, fname_super);
+
+
+    /* clean up everything */
+    vector<double>().swap(TM_vec);
+    vector<string>().swap(xname_vec);
+    vector<string>().swap(yname_vec);
+    delete [] assign1_list;
+    delete [] assign2_list;
+    DeleteArray(&TMave_mat,chain1_num);
+    DeleteArray(&ut_mat,   chain1_num);
+    vector<vector<string> >().swap(seqxA_mat);
+    vector<vector<string> >().swap(seqM_mat);
+    vector<vector<string> >().swap(seqyA_mat);
+    vector<string>().swap(tmp_str_vec);
+
+    vector<vector<vector<double> > >().swap(xa_vec); // structure of complex1
+    vector<vector<vector<double> > >().swap(ya_vec); // structure of complex2
+    vector<vector<char> >().swap(seqx_vec); // sequence of complex1
+    vector<vector<char> >().swap(seqy_vec); // sequence of complex2
+    vector<vector<char> >().swap(secx_vec); // secondary structure of complex1
+    vector<vector<char> >().swap(secy_vec); // secondary structure of complex2
+    mol_vec1.clear();       // molecule type of complex1, RNA if >0
+    mol_vec2.clear();       // molecule type of complex2, RNA if >0
+    vector<string>().swap(chainID_list1);  // list of chainID1
+    vector<string>().swap(chainID_list2);  // list of chainID2
+    xlen_vec.clear();       // length of complex1
+    ylen_vec.clear();       // length of complex2
+    return 1;
+}
+
+int mTMalign(string &xname, string &yname, const string &fname_super,
+    const string &fname_matrix,
+    vector<string> &sequence, double Lnorm_ass, const double d0_scale,
+    const bool m_opt, const int  i_opt, const int o_opt, const int a_opt,
+    bool u_opt, const bool d_opt, const bool full_opt, const double TMcut,
+    const int infmt_opt, const int ter_opt,
+    const int split_opt, const int outfmt_opt, bool fast_opt,
+    const int het_opt,
+    const string &atom_opt, const string &mol_opt, const string &dir_opt,
+    const int byresi_opt,
+    const vector<string> &chain_list)
+{
+    /* declare previously global variables */
+    vector<vector<vector<double> > >a_vec;  // atomic structure
+    vector<vector<vector<double> > >ua_vec; // unchanged atomic structure 
+    vector<vector<char> >seq_vec;  // sequence of complex
+    vector<vector<char> >sec_vec;  // secondary structure of complex
+    vector<int> mol_vec;           // molecule type of complex1, RNA if >0
+    vector<string> chainID_list;   // list of chainID
+    vector<int> len_vec;           // length of complex
+    int    i,j;                    // chain index
+    int    xlen, ylen;             // chain length
+    double **xa, **ya;             // structure of single chain
+    char   *seqx, *seqy;           // for the protein sequence 
+    char   *secx, *secy;           // for the secondary structure 
+    int    len_aa,len_na;          // total length of protein and RNA/DNA
+    vector<string> resi_vec;       // residue index for chain
+
+    /* parse chain list */
+    parse_chain_list(chain_list, a_vec, seq_vec, sec_vec, mol_vec,
+        len_vec, chainID_list, ter_opt, split_opt, mol_opt, infmt_opt,
+        atom_opt, false, het_opt, len_aa, len_na, o_opt, resi_vec);
+    int chain_num=a_vec.size();
+    if (chain_num<=1) PrintErrorAndQuit("ERROR! <2 chains for multiple alignment");
+    if (m_opt||o_opt) for (i=0;i<chain_num;i++) ua_vec.push_back(a_vec[i]);
+    int mol_type=0;
+    int total_len=0;
+    xlen=0;
+    for (i=0; i<chain_num; i++)
+    {
+        if (len_vec[i]>xlen) xlen=len_vec[i];
+        total_len+=len_vec[i];
+        mol_type+=mol_vec[i];
+    }
+    if (!u_opt) Lnorm_ass=total_len/chain_num;
+    u_opt=true;
+    total_len-=xlen;
+    if (total_len>750) fast_opt=true;
+
+    /* get all-against-all alignment */
+    double **TMave_mat;
+    NewArray(&TMave_mat,chain_num,chain_num);
+    vector<string> tmp_str_vec(chain_num,"");
+    vector<vector<string> >seqxA_mat(chain_num,tmp_str_vec);
+    vector<vector<string> >seqyA_mat(chain_num,tmp_str_vec);
+    for (i=0;i<chain_num;i++) for (j=0;j<chain_num;j++) TMave_mat[i][j]=0;
+    for (i=0;i<chain_num;i++)
+    {
+        xlen=len_vec[i];
+        if (xlen<3) continue;
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(a_vec[i],seq_vec[i],sec_vec[i],xlen,xa,seqx,secx);
+        seqxA_mat[i][i]=seqyA_mat[i][i]=(string)(seqx);
+        for (j=i+1;j<chain_num;j++)
+        {
+            ylen=len_vec[j];
+            if (ylen<3) continue;
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            NewArray(&ya, ylen, 3);
+            copy_chain_data(a_vec[j],seq_vec[j],sec_vec[j],ylen,ya,seqy,secy);
+            
+            /* declare variable specific to this pair of TMalign */
+            double t0[3], u0[3][3];
+            double TM1, TM2;
+            double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+            double d0_0, TM_0;
+            double d0A, d0B, d0u, d0a;
+            double d0_out=5.0;
+            string seqM, seqxA, seqyA;// for output alignment
+            double rmsd0 = 0.0;
+            int L_ali;                // Aligned length in standard_TMscore
+            double Liden=0;
+            double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+            int n_ali=0;
+            int n_ali8=0;
+
+            /* entry function for structure alignment */
+            TMalign_main(xa, ya, seqx, seqy, secx, secy,
+                t0, u0, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                0, false, u_opt, false, fast_opt,
+                mol_type,TMcut);
+
+            /* store result */
+            TMave_mat[i][j]=TMave_mat[j][i]=TM4;
+            seqxA_mat[i][j]=seqyA_mat[j][i]=seqxA;
+            seqyA_mat[i][j]=seqxA_mat[j][i]=seqyA;
+            //cout<<chain_list[i]<<':'<<chainID_list[i]
+                //<<chain_list[j]<<':'<<chainID_list[j]<<"\tTM4="<<TM4<<endl;
+            if (full_opt) output_results(
+                chain_list[i],chain_list[j], chainID_list[i], chainID_list[j],
+                xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
+                seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
+                n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B,
+                Lnorm_ass, d0_scale, d0a, d0u, "",
+                outfmt_opt, ter_opt, true, split_opt, o_opt, "",
+                0, a_opt, false, d_opt, false, resi_vec, resi_vec);
+
+            /* clean up */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+        }
+
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+    }
+
+    /* representative related variables */   
+    int r;
+    int repr_idx=0;
+    vector<string>xname_vec;
+    for (i=0;i<chain_num;i++) xname_vec.push_back(
+        chain_list[i].substr(dir_opt.size())+chainID_list[i]);
+    vector<string>yname_vec;
+    double *TMave_list;
+    TMave_list = new double[chain_num];
+    int *assign_list;
+    assign_list=new int[chain_num];
+    vector<string> msa(ylen,""); // row is position along msa; column is sequence
+
+    int compare_num;
+    double TM1_total, TM2_total;
+    double TM3_total, TM4_total, TM5_total;     // for a_opt, u_opt, d_opt
+    double d0_0_total, TM_0_total;
+    double d0A_total, d0B_total, d0u_total, d0a_total;
+    double d0_out_total;
+    double rmsd0_total;
+    int L_ali_total;                // Aligned length in standard_TMscore
+    double Liden_total;
+    double TM_ali_total, rmsd_ali_total;  // TMscore and rmsd in standard_TMscore
+    int n_ali_total;
+    int n_ali8_total;
+    int xlen_total, ylen_total;
+    double TM4_total_max=0;
+
+    int max_iter=5-(int)(total_len/200);
+    if (max_iter<2) max_iter=2;
+    int iter=0;
+    vector<double> TM_vec(chain_num,0);
+    vector<double> d0_vec(chain_num,0);
+    vector<double> seqID_vec(chain_num,0);
+    vector<vector<double> > TM_mat(chain_num,TM_vec);
+    vector<vector<double> > d0_mat(chain_num,d0_vec);
+    vector<vector<double> > seqID_mat(chain_num,seqID_vec);
+    for (iter=0; iter<max_iter; iter++)
+    {
+        /* select representative */   
+        for (j=0; j<chain_num; j++) TMave_list[j]=0;
+        for (i=0; i<chain_num; i++ )
+        {
+            for (j=0; j<chain_num; j++)
+            {
+                //cout<<'\t'<<setprecision(4)<<TMave_mat[i][j];
+                TMave_list[j]+=TMave_mat[i][j];
+            }
+            //cout<<'\t'<<chain_list[i]<<':'<<chainID_list[i]<<endl;
+        }
+        repr_idx=0;
+        double repr_TM=0;
+        for (j=0; j<chain_num; j++)
+        {
+            //cout<<chain_list[j]<<'\t'<<len_vec[j]<<'\t'<<TMave_list[j]<<endl;
+            if (TMave_list[j]<repr_TM) continue;
+            repr_TM=TMave_list[j];
+            repr_idx=j;
+        }
+        //cout<<"repr="<<repr_idx<<"; "<<chain_list[repr_idx]<<"; TM="<<repr_TM<<endl;
+
+        /* superpose superpose */
+        yname=chain_list[repr_idx].substr(dir_opt.size())+chainID_list[repr_idx];
+        double **xt;
+        vector<pair<double,int> >TM_pair_vec; // TM vs chain
+
+        for (i=0; i<chain_num; i++) assign_list[i]=-1;
+        assign_list[repr_idx]=repr_idx;
+        //ylen = len_vec[repr_idx];
+        //seqy = new char[ylen+1];
+        //secy = new char[ylen+1];
+        //NewArray(&ya, ylen, 3);
+        //copy_chain_data(a_vec[repr_idx],seq_vec[repr_idx],sec_vec[repr_idx], ylen,ya,seqy,secy);
+        for (r=0;r<sequence.size();r++) sequence[r].clear(); sequence.clear();
+        sequence.push_back("");
+        sequence.push_back("");
+        for (i=0;i<chain_num;i++)
+        {
+            yname_vec.push_back(yname);
+            xlen = len_vec[i];
+            if (i==repr_idx || xlen<3) continue;
+            TM_pair_vec.push_back(make_pair(-TMave_mat[i][repr_idx],i));
+        }
+        sort(TM_pair_vec.begin(),TM_pair_vec.end());
+    
+        int tm_idx;
+        if (outfmt_opt<0) cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t"
+                               <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"<<endl;
+        for (tm_idx=0; tm_idx<TM_pair_vec.size(); tm_idx++)
+        {
+            i=TM_pair_vec[tm_idx].second;
+            xlen = len_vec[i];
+            seqx = new char[xlen+1];
+            secx = new char[xlen+1];
+            NewArray(&xa, xlen, 3);
+            copy_chain_data(a_vec[i],seq_vec[i],sec_vec[i], xlen,xa,seqx,secx);
+
+            double maxTM=TMave_mat[i][repr_idx];
+            int maxj=repr_idx;
+            for (j=0;j<chain_num;j++)
+            {
+                if (i==j || assign_list[j]<0 || TMave_mat[i][j]<=maxTM) continue;
+                maxj=j;
+                maxTM=TMave_mat[i][j];
+            }
+            j=maxj;
+            assign_list[i]=j;
+            ylen = len_vec[j];
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            NewArray(&ya, ylen, 3);
+            copy_chain_data(a_vec[j],seq_vec[j],sec_vec[j], ylen,ya,seqy,secy);
+
+            sequence[0]=seqxA_mat[i][j];
+            sequence[1]=seqyA_mat[i][j];
+            //cout<<"tm_idx="<<tm_idx<<"\ti="<<i<<"\tj="<<j<<endl;
+            //cout<<"superpose "<<xname_vec[i]<<" to "<<xname_vec[j]<<endl;
+
+            /* declare variable specific to this pair of TMalign */
+            double t0[3], u0[3][3];
+            double TM1, TM2;
+            double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+            double d0_0, TM_0;
+            double d0A, d0B, d0u, d0a;
+            double d0_out=5.0;
+            string seqM, seqxA, seqyA;// for output alignment
+            double rmsd0 = 0.0;
+            int L_ali;                // Aligned length in standard_TMscore
+            double Liden=0;
+            double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+            int n_ali=0;
+            int n_ali8=0;
+
+            /* entry function for structure alignment */
+            TMalign_main(xa, ya, seqx, seqy, secx, secy,
+                t0, u0, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                2,  a_opt, u_opt, d_opt, fast_opt, mol_type);
+        
+            if (outfmt_opt<0) output_results(
+                xname_vec[i].c_str(), xname_vec[j].c_str(), "", "",
+                xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5,
+                rmsd0, d0_out, seqM.c_str(),
+                seqxA.c_str(), seqyA.c_str(), Liden,
+                n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0,
+                d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, 
+                "", 2,//outfmt_opt,
+                ter_opt, false, split_opt, 
+                false, "",//o_opt, fname_super+chainID_list1[i], 
+                false, a_opt, u_opt, d_opt, false,
+                resi_vec, resi_vec);
+         
+            NewArray(&xt,xlen,3);
+            do_rotation(xa, xt, xlen, t0, u0);
+            for (r=0;r<xlen;r++)
+            {
+                a_vec[i][r][0]=xt[r][0];
+                a_vec[i][r][1]=xt[r][1];
+                a_vec[i][r][2]=xt[r][2];
+            }
+            DeleteArray(&xt, xlen);
+        
+            /* clean up */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+            sequence[0].clear();
+            sequence[1].clear();
+
+            delete[]seqx;
+            delete[]secx;
+            DeleteArray(&xa,xlen);
+        
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+        }
+        ylen = len_vec[repr_idx];
+        seqy = new char[ylen+1];
+        secy = new char[ylen+1];
+        NewArray(&ya, ylen, 3);
+        copy_chain_data(a_vec[repr_idx],seq_vec[repr_idx],sec_vec[repr_idx], ylen,ya,seqy,secy);
+
+        /* recover alignment */ 
+        int    ylen_ext=ylen;        // chain length
+        double **ya_ext;             // structure of single chain
+        char   *seqy_ext;            // for the protein sequence 
+        char   *secy_ext;            // for the secondary structure 
+        for (r=0;r<msa.size();r++) msa[r].clear(); msa.clear();
+        msa.assign(ylen,""); // row is position along msa; column is sequence
+        vector<string> msa_ext;      // row is position along msa; column is sequence
+        for (r=0;r<ylen;r++) msa[r]=seqy[r];
+        //for (r=0;r<msa.size();r++) cout<<"["<<r<<"]\t"<<msa[r]<<endl;
+        //cout<<"start recover"<<endl;
+        assign_list[repr_idx]=0;
+        for (tm_idx=0; tm_idx<TM_pair_vec.size(); tm_idx++)
+        {
+            i=TM_pair_vec[tm_idx].second;
+            assign_list[i]=tm_idx+1;
+
+            xlen = len_vec[i];
+            seqx = new char[xlen+1];
+            secx = new char[xlen+1];
+            NewArray(&xa, xlen, 3);
+            copy_chain_data(a_vec[i],seq_vec[i],sec_vec[i], xlen,xa,seqx,secx);
+        
+            /* declare variable specific to this pair of TMalign */
+            double TM1, TM2;
+            double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+            double d0_0, TM_0;
+            double d0A, d0B, d0u, d0a;
+            double d0_out=5.0;
+            string seqM, seqxA, seqyA;// for output alignment
+            double rmsd0 = 0.0;
+            int L_ali;                // Aligned length in standard_TMscore
+            double Liden=0;
+            double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+            int n_ali=0;
+            int n_ali8=0;
+            int *invmap = new int[ylen+1];
+
+            se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                0, a_opt, u_opt, d_opt, mol_type, 1, invmap);
+
+            int rx=0,ry=0;
+            ylen_ext=seqxA.size();
+            NewArray(&ya_ext, ylen_ext, 3);             // structure of single chain
+            seqy_ext= new char[ylen_ext+1];            // for the protein sequence 
+            secy_ext= new char[ylen_ext+1];            // for the secondary structure 
+            string tmp_gap="";
+            for (r=0;r<msa[0].size();r++) tmp_gap+='-';
+            for (r=msa_ext.size();r<ylen_ext;r++) msa_ext.push_back("");
+            //cout<<"x:"<<xname_vec[i]<<'\n'<<seqxA<<endl;
+            //cout<<"y:"<<xname_vec[repr_idx]<<'\n'<<seqyA<<endl;
+            for (r=0;r<ylen_ext;r++)
+            {
+                if (seqyA[r]=='-')
+                {
+                    msa_ext[r]=tmp_gap+seqxA[r];
+                    ya_ext[r][0]=xa[rx][0];
+                    ya_ext[r][1]=xa[rx][1];
+                    ya_ext[r][2]=xa[rx][2];
+                    seqy_ext[r]=seqx[rx];
+                    secy_ext[r]=secx[rx];
+                }
+                else
+                {
+                    msa_ext[r]=msa[ry]+seqxA[r];
+                    ya_ext[r][0]=ya[ry][0];
+                    ya_ext[r][1]=ya[ry][1];
+                    ya_ext[r][2]=ya[ry][2];
+                    seqy_ext[r]=seqy[ry];
+                    secy_ext[r]=secy[ry];
+                }
+                rx+=(seqxA[r]!='-');
+                ry+=(seqyA[r]!='-');
+            }
+
+            /* copy ya_ext to ya */
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+
+            ylen=ylen_ext;
+            NewArray(&ya,ylen,3);
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            for (r=0;r<ylen;r++)
+            {
+                ya[r][0]=ya_ext[r][0];
+                ya[r][1]=ya_ext[r][1];
+                ya[r][2]=ya_ext[r][2];
+                seqy[r]=seqy_ext[r];
+                secy[r]=secy_ext[r];
+            }
+            for (r=0;r<ylen;r++)
+            {
+                if (r<msa.size()) msa[r]=msa_ext[r];
+                else msa.push_back(msa_ext[r]);
+            }
+            //for (r=0;r<ylen_ext;r++) cout<<"["<<r<<"]\t"<<msa_ext[r]<<'\t'<<seqy[r]<<'\t'
+                    //<<ya[r][0]<<'\t'<<ya[r][1]<<'\t'<<ya[r][2]<<'\t'<<secy[r]<<endl;
+
+            /* clean up */
+            tmp_gap.clear();
+            delete[]invmap;
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            delete[]seqx;
+            delete[]secx;
+            DeleteArray(&xa,xlen);
+
+            delete[]seqy_ext;
+            delete[]secy_ext;
+            DeleteArray(&ya_ext,ylen_ext);
+        }
+        vector<string>().swap(msa_ext);
+        vector<pair<double,int> >().swap(TM_pair_vec);
+        for (i=0; i<chain_num; i++)
+        {
+            tm_idx=assign_list[i];
+            if (tm_idx<0) continue;
+            seqyA_mat[i][i]="";
+            for (r=0 ;r<ylen ; r++) seqyA_mat[i][i]+=msa[r][tm_idx];
+            seqxA_mat[i][i]=seqyA_mat[i][i];
+            //cout<<xname_vec[i]<<'\t'<<seqxA_mat[i][i]<<endl;
+        }
+        for (i=0;i<chain_num; i++)
+        {
+            if (assign_list[i]<0) continue;
+            string seqxA=seqxA_mat[i][i];
+            for (j=0; j<chain_num; j++)
+            {
+                if (i==j || assign_list[j]<0) continue;
+                string seqyA=seqyA_mat[j][j];
+                seqxA_mat[i][j]=seqyA_mat[i][j]="";
+                for (r=0;r<ylen;r++)
+                {
+                    if (seqxA[r]=='-' && seqyA[r]=='-') continue;
+                    seqxA_mat[i][j]+=seqxA[r];
+                    seqyA_mat[i][j]+=seqyA[r];
+                }
+                seqyA.clear();
+            }
+            seqxA.clear();
+        }
+
+        /* recover statistics such as TM-score */ 
+        compare_num=0;
+        TM1_total=0, TM2_total=0;
+        TM3_total=0, TM4_total=0, TM5_total=0;
+        d0_0_total=0, TM_0_total=0;
+        d0A_total=0, d0B_total=0, d0u_total=0, d0a_total=0;
+        d0_out_total=0;
+        rmsd0_total = 0.0;
+        L_ali_total=0;
+        Liden_total=0;
+        TM_ali_total=0, rmsd_ali_total=0;
+        n_ali_total=0;
+        n_ali8_total=0;
+        xlen_total=0, ylen_total=0;
+        for (i=0; i< chain_num; i++)
+        {
+            xlen=len_vec[i];
+            if (xlen<3) continue;
+            seqx = new char[xlen+1];
+            secx = new char[xlen+1];
+            NewArray(&xa, xlen, 3);
+            copy_chain_data(a_vec[i],seq_vec[i],sec_vec[i], xlen,xa,seqx,secx);
+            for (j=i+1;j<chain_num;j++)
+            {
+                ylen=len_vec[j];
+                if (ylen<3) continue;
+                compare_num++;
+                seqy = new char[ylen+1];
+                secy = new char[ylen+1];
+                NewArray(&ya, ylen, 3);
+                copy_chain_data(a_vec[j],seq_vec[j],sec_vec[j],ylen,ya,seqy,secy);
+                sequence[0]=seqxA_mat[i][j];
+                sequence[1]=seqyA_mat[i][j];
+            
+                /* declare variable specific to this pair of TMalign */
+                double TM1, TM2;
+                double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+                double d0_0, TM_0;
+                double d0A, d0B, d0u, d0a;
+                double d0_out=5.0;
+                string seqM, seqxA, seqyA;// for output alignment
+                double rmsd0 = 0.0;
+                int L_ali=0;              // Aligned length in standard_TMscore
+                double Liden=0;
+                double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+                int n_ali=0;
+                int n_ali8=0;
+                int *invmap = new int[ylen+1];
+
+                se_main(xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+                    d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+                    rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                    xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                    true, a_opt, u_opt, d_opt, mol_type, 1, invmap);
+
+                if (xlen<=ylen)
+                {
+                    xlen_total+=xlen;
+                    ylen_total+=ylen;
+                    TM1_total+=TM1;
+                    TM2_total+=TM2;
+                    d0A_total+=d0A;
+                    d0B_total+=d0B;
+                }
+                else
+                {
+                    xlen_total+=ylen;
+                    ylen_total+=xlen;
+                    TM1_total+=TM2;
+                    TM2_total+=TM1;
+                    d0A_total+=d0B;
+                    d0B_total+=d0A;
+                }
+                TM_mat[i][j]=TM2;
+                TM_mat[j][i]=TM1;
+                d0_mat[i][j]=d0B;
+                d0_mat[j][i]=d0A;
+                seqID_mat[i][j]=1.*Liden/xlen;
+                seqID_mat[j][i]=1.*Liden/ylen;
+
+                TM3_total+=TM3;
+                TM4_total+=TM4;
+                TM5_total+=TM5;
+                d0_0_total+=d0_0;
+                TM_0_total+=TM_0;
+                d0u_total+=d0u;
+                d0_out_total+=d0_out;
+                rmsd0_total+=rmsd0;
+                L_ali_total+=L_ali;        // Aligned length in standard_TMscore
+                Liden_total+=Liden;
+                TM_ali_total+=TM_ali;
+                rmsd_ali_total+=rmsd_ali;  // TMscore and rmsd in standard_TMscore
+                n_ali_total+=n_ali;
+                n_ali8_total+=n_ali8;
+
+                /* clean up */
+                delete[]invmap;
+                seqM.clear();
+                seqxA.clear();
+                seqyA.clear();
+
+                delete[]seqy;
+                delete[]secy;
+                DeleteArray(&ya,ylen);
+            }
+            delete[]seqx;
+            delete[]secx;
+            DeleteArray(&xa,xlen);
+        }
+        if (TM4_total<=TM4_total_max) break;
+        TM4_total_max=TM4_total;
+    }
+    for (i=0;i<chain_num;i++)
+    {
+        for (j=0;j<chain_num;j++)
+        {
+            if (i==j) continue;
+            TM_vec[i]+=TM_mat[i][j];
+            d0_vec[i]+=d0_mat[i][j];
+            seqID_vec[i]+=seqID_mat[i][j];
+        }
+        TM_vec[i]/=(chain_num-1);
+        d0_vec[i]/=(chain_num-1);
+        seqID_vec[i]/=(chain_num-1);
+    }
+    xlen_total    /=compare_num;
+    ylen_total    /=compare_num;
+    TM1_total     /=compare_num;
+    TM2_total     /=compare_num;
+    d0A_total     /=compare_num;
+    d0B_total     /=compare_num;
+    TM3_total     /=compare_num;
+    TM4_total     /=compare_num;
+    TM5_total     /=compare_num;
+    d0_0_total    /=compare_num;
+    TM_0_total    /=compare_num;
+    d0u_total     /=compare_num;
+    d0_out_total  /=compare_num;
+    rmsd0_total   /=compare_num;
+    L_ali_total   /=compare_num;
+    Liden_total   /=compare_num;
+    TM_ali_total  /=compare_num;
+    rmsd_ali_total/=compare_num;
+    n_ali_total   /=compare_num;
+    n_ali8_total  /=compare_num;
+    xname="shorter";
+    yname="longer";
+    string seqM="";
+    string seqxA="";
+    string seqyA="";
+    double t0[3];
+    double u0[3][3];
+    stringstream buf;
+    for (i=0; i<chain_num; i++)
+    {
+        if (assign_list[i]<0) continue;
+        buf <<">"<<xname_vec[i]<<"\tL="<<len_vec[i]
+            <<"\td0="<<setiosflags(ios::fixed)<<setprecision(2)<<d0_vec[i]
+            <<"\tseqID="<<setiosflags(ios::fixed)<<setprecision(3)<<seqID_vec[i]
+            <<"\tTM-score="<<setiosflags(ios::fixed)<<setprecision(5)<<TM_vec[i];
+        if (i==repr_idx) buf<<"\t*";
+        buf<<'\n'<<seqxA_mat[i][i]<<endl;
+    }
+    seqM=buf.str();
+    seqM=seqM.substr(0,seqM.size()-1);
+    buf.str(string());
+    //MergeAlign(seqxA_mat,seqyA_mat,repr_idx,xname_vec,chain_num,seqM);
+    if (outfmt_opt==0) print_version();
+    output_mTMalign_results( xname,yname, "","",
+        xlen_total, ylen_total, t0, u0, TM1_total, TM2_total, 
+        TM3_total, TM4_total, TM5_total, rmsd0_total, d0_out_total,
+        seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden_total,
+        n_ali8_total, L_ali_total, TM_ali_total, rmsd_ali_total,
+        TM_0_total, d0_0_total, d0A_total, d0B_total,
+        Lnorm_ass, d0_scale, d0a_total, d0u_total, 
+        "", outfmt_opt, ter_opt, 0, split_opt, false,
+        "", false, a_opt, u_opt, d_opt, false,
+        resi_vec, resi_vec );
+
+    if (m_opt || o_opt)
+    {
+        double **ut_mat; // rotation matrices for all-against-all alignment
+        int ui,uj;
+        double t[3], u[3][3];
+        double rmsd;
+        NewArray(&ut_mat,chain_num,4*3);
+        for (i=0;i<chain_num;i++)
+        {
+            xlen=ylen=a_vec[i].size();
+            NewArray(&xa,xlen,3);
+            NewArray(&ya,xlen,3);
+            for (r=0;r<xlen;r++)
+            {
+                xa[r][0]=ua_vec[i][r][0];
+                xa[r][1]=ua_vec[i][r][1];
+                xa[r][2]=ua_vec[i][r][2];
+                ya[r][0]= a_vec[i][r][0];
+                ya[r][1]= a_vec[i][r][1];
+                ya[r][2]= a_vec[i][r][2];
+            }
+            Kabsch(xa,ya,xlen,1,&rmsd,t,u);
+            for (ui=0;ui<3;ui++) for (uj=0;uj<3;uj++) ut_mat[i][ui*3+uj]=u[ui][uj];
+            for (uj=0;uj<3;uj++) ut_mat[i][9+uj]=t[uj];
+            DeleteArray(&xa,xlen);
+            DeleteArray(&ya,xlen);
+        }
+        vector<vector<vector<double> > >().swap(ua_vec);
+
+        if (m_opt)
+        {
+            assign_list[repr_idx]=-1;
+            output_dock_rotation_matrix(fname_matrix.c_str(),
+                xname_vec,yname_vec, ut_mat, assign_list);
+        }
+
+        if (o_opt) output_dock(chain_list, ter_opt, split_opt, 
+                infmt_opt, atom_opt, false, ut_mat, fname_super);
+        
+        DeleteArray(&ut_mat,chain_num);
+    }
+
+    /* clean up */
+    vector<string>().swap(msa);
+    vector<string>().swap(tmp_str_vec);
+    vector<vector<string> >().swap(seqxA_mat);
+    vector<vector<string> >().swap(seqyA_mat);
+    vector<string>().swap(xname_vec);
+    vector<string>().swap(yname_vec);
+    delete[]TMave_list;
+    DeleteArray(&TMave_mat,chain_num);
+    vector<vector<vector<double> > >().swap(a_vec); // structure of complex
+    vector<vector<char> >().swap(seq_vec); // sequence of complex
+    vector<vector<char> >().swap(sec_vec); // secondary structure of complex
+    vector<int>().swap(mol_vec);           // molecule type of complex1, RNA if >0
+    vector<string>().swap(chainID_list);   // list of chainID
+    vector<int>().swap(len_vec);           // length of complex
+    vector<double>().swap(TM_vec);
+    vector<double>().swap(d0_vec);
+    vector<double>().swap(seqID_vec);
+    vector<vector<double> >().swap(TM_mat);
+    vector<vector<double> >().swap(d0_mat);
+    vector<vector<double> >().swap(seqID_mat);
+    return 1;
+}
+
+/* sequence order independent alignment */
+int SOIalign(string &xname, string &yname, const string &fname_super,
+    const string &fname_lign, const string &fname_matrix,
+    vector<string> &sequence, const double Lnorm_ass, const double d0_scale,
+    const bool m_opt, const int  i_opt, const int o_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const double TMcut,
+    const int infmt1_opt, const int infmt2_opt, const int ter_opt,
+    const int split_opt, const int outfmt_opt, const bool fast_opt,
+    const int cp_opt, const int mirror_opt, const int het_opt,
+    const string &atom_opt, const string &mol_opt, const string &dir_opt,
+    const string &dir1_opt, const string &dir2_opt, 
+    const vector<string> &chain1_list, const vector<string> &chain2_list,
+    const bool se_opt, const int closeK_opt, const int mm_opt)
+{
+    /* declare previously global variables */
+    vector<vector<string> >PDB_lines1; // text of chain1
+    vector<vector<string> >PDB_lines2; // text of chain2
+    vector<int> mol_vec1;              // molecule type of chain1, RNA if >0
+    vector<int> mol_vec2;              // molecule type of chain2, RNA if >0
+    vector<string> chainID_list1;      // list of chainID1
+    vector<string> chainID_list2;      // list of chainID2
+    int    i,j;                // file index
+    int    chain_i,chain_j;    // chain index
+    int    r;                  // residue index
+    int    xlen, ylen;         // chain length
+    int    xchainnum,ychainnum;// number of chains in a PDB file
+    char   *seqx, *seqy;       // for the protein sequence 
+    char   *secx, *secy;       // for the secondary structure 
+    int    **secx_bond;        // boundary of secondary structure
+    int    **secy_bond;        // boundary of secondary structure
+    double **xa, **ya;         // for input vectors xa[0...xlen-1][0..2] and
+                               // ya[0...ylen-1][0..2], in general,
+                               // ya is regarded as native structure 
+                               // --> superpose xa onto ya
+    double **xk, **yk;         // k closest residues
+    vector<string> resi_vec1;  // residue index for chain1
+    vector<string> resi_vec2;  // residue index for chain2
+    int read_resi=0;  // whether to read residue index
+    if (o_opt) read_resi=2;
+
+    /* loop over file names */
+    for (i=0;i<chain1_list.size();i++)
+    {
+        /* parse chain 1 */
+        xname=chain1_list[i];
+        xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
+            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
+        if (!xchainnum)
+        {
+            cerr<<"Warning! Cannot parse file: "<<xname
+                <<". Chain number 0."<<endl;
+            continue;
+        }
+        for (chain_i=0;chain_i<xchainnum;chain_i++)
+        {
+            xlen=PDB_lines1[chain_i].size();
+            if (mol_opt=="RNA") mol_vec1[chain_i]=1;
+            else if (mol_opt=="protein") mol_vec1[chain_i]=-1;
+            if (!xlen)
+            {
+                cerr<<"Warning! Cannot parse file: "<<xname
+                    <<". Chain length 0."<<endl;
+                continue;
+            }
+            else if (xlen<3)
+            {
+                cerr<<"Sequence is too short <3!: "<<xname<<endl;
+                continue;
+            }
+            NewArray(&xa, xlen, 3);
+            if (closeK_opt>=3) NewArray(&xk, xlen*closeK_opt, 3);
+            seqx = new char[xlen + 1];
+            secx = new char[xlen + 1];
+            xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, 
+                resi_vec1, read_resi);
+            if (mirror_opt) for (r=0;r<xlen;r++) xa[r][2]=-xa[r][2];
+            if (mol_vec1[chain_i]>0) make_sec(seqx,xa, xlen, secx,atom_opt);
+            else make_sec(xa, xlen, secx); // secondary structure assignment
+            if (closeK_opt>=3) getCloseK(xa, xlen, closeK_opt, xk);
+            if (mm_opt==6) 
+            {
+                NewArray(&secx_bond, xlen, 2);
+                assign_sec_bond(secx_bond, secx, xlen);
+            }
+
+            for (j=(dir_opt.size()>0)*(i+1);j<chain2_list.size();j++)
+            {
+                /* parse chain 2 */
+                if (PDB_lines2.size()==0)
+                {
+                    yname=chain2_list[j];
+                    ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
+                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
+                        het_opt);
+                    if (!ychainnum)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain number 0."<<endl;
+                        continue;
+                    }
+                }
+                for (chain_j=0;chain_j<ychainnum;chain_j++)
+                {
+                    ylen=PDB_lines2[chain_j].size();
+                    if (mol_opt=="RNA") mol_vec2[chain_j]=1;
+                    else if (mol_opt=="protein") mol_vec2[chain_j]=-1;
+                    if (!ylen)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain length 0."<<endl;
+                        continue;
+                    }
+                    else if (ylen<3)
+                    {
+                        cerr<<"Sequence is too short <3!: "<<yname<<endl;
+                        continue;
+                    }
+                    NewArray(&ya, ylen, 3);
+                    if (closeK_opt>=3) NewArray(&yk, ylen*closeK_opt, 3);
+                    seqy = new char[ylen + 1];
+                    secy = new char[ylen + 1];
+                    ylen = read_PDB(PDB_lines2[chain_j], ya, seqy,
+                        resi_vec2, read_resi);
+                    if (mol_vec2[chain_j]>0)
+                         make_sec(seqy, ya, ylen, secy, atom_opt);
+                    else make_sec(ya, ylen, secy);
+                    if (closeK_opt>=3) getCloseK(ya, ylen, closeK_opt, yk);
+                    if (mm_opt==6) 
+                    {
+                        NewArray(&secy_bond, ylen, 2);
+                        assign_sec_bond(secy_bond, secy, ylen);
+                    }
+
+                    /* declare variable specific to this pair of TMalign */
+                    double t0[3], u0[3][3];
+                    double TM1, TM2;
+                    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+                    double d0_0, TM_0;
+                    double d0A, d0B, d0u, d0a;
+                    double d0_out=5.0;
+                    string seqM, seqxA, seqyA;// for output alignment
+                    double rmsd0 = 0.0;
+                    int L_ali;                // Aligned length in standard_TMscore
+                    double Liden=0;
+                    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+                    int n_ali=0;
+                    int n_ali8=0;
+                    bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt;
+                    int *invmap = new int[ylen+1];
+                    double *dist_list = new double[ylen+1];
+
+                    /* entry function for structure alignment */
+                    if (se_opt) 
+                    {
+                        u0[0][0]=u0[1][1]=u0[2][2]=1;
+                        u0[0][1]=         u0[0][2]=
+                        u0[1][0]=         u0[1][2]=
+                        u0[2][0]=         u0[2][1]=
+                        t0[0]   =t0[1]   =t0[2]   =0;
+                        soi_se_main(
+                            xa, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+                            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                            seqM, seqxA, seqyA,
+                            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                            xlen, ylen, Lnorm_ass, d0_scale,
+                            i_opt, a_opt, u_opt, d_opt,
+                            mol_vec1[chain_i]+mol_vec2[chain_j], 
+                            outfmt_opt, invmap, dist_list,
+                            secx_bond, secy_bond, mm_opt);
+                        if (outfmt_opt>=2) 
+                        {
+                            Liden=L_ali=0;
+                            int r1,r2;
+                            for (r2=0;r2<ylen;r2++)
+                            {
+                                r1=invmap[r2];
+                                if (r1<0) continue;
+                                L_ali+=1;
+                                Liden+=(seqx[r1]==seqy[r2]);
+                            }
+                        }
+                    }
+                    else SOIalign_main(xa, ya, xk, yk, closeK_opt,
+                        seqx, seqy, secx, secy,
+                        t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                        seqM, seqxA, seqyA, invmap,
+                        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                        i_opt, a_opt, u_opt, d_opt, force_fast_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j], dist_list,
+                        secx_bond, secy_bond, mm_opt);
+
+                    /* print result */
+                    if (outfmt_opt==0) print_version();
+                    output_results(
+                        xname.substr(dir1_opt.size()+dir_opt.size()),
+                        yname.substr(dir2_opt.size()+dir_opt.size()),
+                        chainID_list1[chain_i], chainID_list2[chain_j],
+                        xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        rmsd0, d0_out, seqM.c_str(),
+                        seqxA.c_str(), seqyA.c_str(), Liden,
+                        n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0,
+                        d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, 
+                        (m_opt?fname_matrix:"").c_str(),
+                        outfmt_opt, ter_opt, false, split_opt, o_opt,
+                        fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt,
+                        resi_vec1, resi_vec2);
+                    if (outfmt_opt<=0)
+                    {
+                        cout<<"###############\t###############\t#########"<<endl;
+                        cout<<"#Aligned atom 1\tAligned atom 2 \tDistance#"<<endl;
+                        int r1,r2;
+                        for (r2=0;r2<ylen;r2++)
+                        {
+                            r1=invmap[r2];
+                            if (r1<0) continue;
+                            cout<<PDB_lines1[chain_i][r1].substr(12,15)<<'\t'
+                                <<PDB_lines2[chain_j][r2].substr(12,15)<<'\t'
+                                <<setw(9)<<setiosflags(ios::fixed)<<setprecision(3)
+                                <<dist_list[r2]<<'\n';
+                        }
+                        cout<<"###############\t###############\t#########"<<endl;
+                    }
+
+                    /* Done! Free memory */
+                    delete [] invmap;
+                    delete [] dist_list;
+                    seqM.clear();
+                    seqxA.clear();
+                    seqyA.clear();
+                    DeleteArray(&ya, ylen);
+                    if (closeK_opt>=3) DeleteArray(&yk, ylen*closeK_opt);
+                    delete [] seqy;
+                    delete [] secy;
+                    resi_vec2.clear();
+                    if (mm_opt==6) DeleteArray(&secy_bond, ylen);
+                } // chain_j
+                if (chain2_list.size()>1)
+                {
+                    yname.clear();
+                    for (chain_j=0;chain_j<ychainnum;chain_j++)
+                        PDB_lines2[chain_j].clear();
+                    PDB_lines2.clear();
+                    chainID_list2.clear();
+                    mol_vec2.clear();
+                }
+            } // j
+            PDB_lines1[chain_i].clear();
+            DeleteArray(&xa, xlen);
+            if (closeK_opt>=3) DeleteArray(&xk, xlen*closeK_opt);
+            delete [] seqx;
+            delete [] secx;
+            resi_vec1.clear();
+            if (mm_opt==6) DeleteArray(&secx_bond, xlen);
+        } // chain_i
+        xname.clear();
+        PDB_lines1.clear();
+        chainID_list1.clear();
+        mol_vec1.clear();
+    } // i
+    if (chain2_list.size()==1)
+    {
+        yname.clear();
+        for (chain_j=0;chain_j<ychainnum;chain_j++)
+            PDB_lines2[chain_j].clear();
+        PDB_lines2.clear();
+        resi_vec2.clear();
+        chainID_list2.clear();
+        mol_vec2.clear();
+    }
+    return 0;
+}
+
+int flexalign(string &xname, string &yname, const string &fname_super,
+    const string &fname_lign, const string &fname_matrix,
+    vector<string> &sequence, const double Lnorm_ass, const double d0_scale,
+    const bool m_opt, const int  i_opt, const int o_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const double TMcut,
+    const int infmt1_opt, const int infmt2_opt, const int ter_opt,
+    const int split_opt, const int outfmt_opt, const bool fast_opt,
+    const int mirror_opt, const int het_opt,
+    const string &atom_opt, const string &mol_opt, const string &dir_opt,
+    const string &dir1_opt, const string &dir2_opt, const int byresi_opt,
+    const vector<string> &chain1_list, const vector<string> &chain2_list,
+    const int hinge_opt)
+{
+    /* declare previously global variables */
+    vector<vector<string> >PDB_lines1; // text of chain1
+    vector<vector<string> >PDB_lines2; // text of chain2
+    vector<int> mol_vec1;              // molecule type of chain1, RNA if >0
+    vector<int> mol_vec2;              // molecule type of chain2, RNA if >0
+    vector<string> chainID_list1;      // list of chainID1
+    vector<string> chainID_list2;      // list of chainID2
+    int    i,j;                // file index
+    int    chain_i,chain_j;    // chain index
+    int    r;                  // residue index
+    int    xlen, ylen;         // chain length
+    int    xchainnum,ychainnum;// number of chains in a PDB file
+    char   *seqx, *seqy;       // for the protein sequence 
+    char   *secx, *secy;       // for the secondary structure 
+    double **xa, **ya;         // for input vectors xa[0...xlen-1][0..2] and
+                               // ya[0...ylen-1][0..2], in general,
+                               // ya is regarded as native structure 
+                               // --> superpose xa onto ya
+    vector<string> resi_vec1;  // residue index for chain1
+    vector<string> resi_vec2;  // residue index for chain2
+    int read_resi=byresi_opt;  // whether to read residue index
+    if (byresi_opt==0 && o_opt) read_resi=2;
+
+    /* loop over file names */
+    for (i=0;i<chain1_list.size();i++)
+    {
+        /* parse chain 1 */
+        xname=chain1_list[i];
+        xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
+            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
+        if (!xchainnum)
+        {
+            cerr<<"Warning! Cannot parse file: "<<xname
+                <<". Chain number 0."<<endl;
+            continue;
+        }
+        for (chain_i=0;chain_i<xchainnum;chain_i++)
+        {
+            xlen=PDB_lines1[chain_i].size();
+            if (mol_opt=="RNA") mol_vec1[chain_i]=1;
+            else if (mol_opt=="protein") mol_vec1[chain_i]=-1;
+            if (!xlen)
+            {
+                cerr<<"Warning! Cannot parse file: "<<xname
+                    <<". Chain length 0."<<endl;
+                continue;
+            }
+            else if (xlen<3)
+            {
+                cerr<<"Sequence is too short <3!: "<<xname<<endl;
+                continue;
+            }
+            NewArray(&xa, xlen, 3);
+            seqx = new char[xlen + 1];
+            secx = new char[xlen + 1];
+            xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, 
+                resi_vec1, read_resi);
+            if (mirror_opt) for (r=0;r<xlen;r++) xa[r][2]=-xa[r][2];
+            if (mol_vec1[chain_i]>0) make_sec(seqx,xa, xlen, secx,atom_opt);
+            else make_sec(xa, xlen, secx); // secondary structure assignment
+
+            for (j=(dir_opt.size()>0)*(i+1);j<chain2_list.size();j++)
+            {
+                /* parse chain 2 */
+                if (PDB_lines2.size()==0)
+                {
+                    yname=chain2_list[j];
+                    ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
+                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
+                        het_opt);
+                    if (!ychainnum)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain number 0."<<endl;
+                        continue;
+                    }
+                }
+                for (chain_j=0;chain_j<ychainnum;chain_j++)
+                {
+                    ylen=PDB_lines2[chain_j].size();
+                    if (mol_opt=="RNA") mol_vec2[chain_j]=1;
+                    else if (mol_opt=="protein") mol_vec2[chain_j]=-1;
+                    if (!ylen)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain length 0."<<endl;
+                        continue;
+                    }
+                    else if (ylen<3)
+                    {
+                        cerr<<"Sequence is too short <3!: "<<yname<<endl;
+                        continue;
+                    }
+                    NewArray(&ya, ylen, 3);
+                    seqy = new char[ylen + 1];
+                    secy = new char[ylen + 1];
+                    ylen = read_PDB(PDB_lines2[chain_j], ya, seqy,
+                        resi_vec2, read_resi);
+                    if (mol_vec2[chain_j]>0)
+                         make_sec(seqy, ya, ylen, secy, atom_opt);
+                    else make_sec(ya, ylen, secy);
+
+                    if (byresi_opt) extract_aln_from_resi(sequence,
+                        seqx,seqy,resi_vec1,resi_vec2,byresi_opt);
+
+                    /* declare variable specific to this pair of TMalign */
+                    double t0[3], u0[3][3];
+                    double TM1, TM2;
+                    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+                    double d0_0, TM_0;
+                    double d0A, d0B, d0u, d0a;
+                    double d0_out=5.0;
+                    string seqM, seqxA, seqyA;// for output alignment
+                    double rmsd0 = 0.0;
+                    int L_ali;                // Aligned length in standard_TMscore
+                    double Liden=0;
+                    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+                    int n_ali=0;
+                    int n_ali8=0;
+                    bool force_fast_opt=(getmin(xlen,ylen)>1500)?true:fast_opt;
+                    vector<vector<double> >tu_vec;
+
+                    /* entry function for structure alignment */
+                    int hingeNum=flexalign_main(
+                        xa, ya, seqx, seqy, secx, secy,
+                        t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5,
+                        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                        seqM, seqxA, seqyA,
+                        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                        i_opt, a_opt, u_opt, d_opt, force_fast_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j],hinge_opt);
+                    
+                    if (hinge_opt && hingeNum<=1 &&
+                        n_ali8<0.6*getmin(xlen,ylen))
+                    {
+                        double t0_h[3], u0_h[3][3];
+                        double TM1_h, TM2_h;
+                        double TM3_h, TM4_h, TM5_h;
+                        double d0_0_h, TM_0_h;
+                        double d0_out_h=5.0;
+                        string seqM_h, seqxA_h, seqyA_h;
+                        double rmsd0_h = 0.0;
+                        int L_ali_h;
+                        double Liden_h=0;
+                        double TM_ali_h, rmsd_ali_h;
+                        int n_ali_h=0;
+                        int n_ali8_h=0;
+                        vector<vector<double> >tu_vec_h(1,tu_vec[0]);
+                        tu2t_u(tu_vec[0],t0_h,u0_h);
+
+                        int hingeNum_h=flexalign_main(
+                            xa, ya, seqx, seqy, secx, secy,
+                            t0_h, u0_h, tu_vec_h,
+                            TM1_h, TM2_h, TM3_h, TM4_h, TM5_h,
+                            d0_0_h, TM_0_h, d0A, d0B, d0u, d0a, d0_out_h,
+                            seqM_h, seqxA_h, seqyA_h, rmsd0_h, L_ali_h,
+                            Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h,
+                            xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt,
+                            a_opt, u_opt, d_opt, force_fast_opt,
+                            mol_vec1[chain_i]+mol_vec2[chain_j],hinge_opt);
+                        
+                        double TM  =(TM1  >TM2  )?TM1  :TM2;
+                        double TM_h=(TM1_h>TM2_h)?TM1_h:TM2_h;
+                        if (TM_h>TM)
+                        {
+                            hingeNum=hingeNum_h;
+                            tu2t_u(tu_vec_h[0],t0,u0);
+                            TM1=TM1_h;
+                            TM2=TM2_h;
+                            TM3=TM3_h;
+                            TM4=TM4_h;
+                            TM5=TM5_h;
+                            d0_0=d0_0_h;
+                            TM_0=TM_0_h;
+                            d0_out=d0_out_h;
+                            seqM=seqM_h;
+                            seqxA=seqxA_h;
+                            seqyA=seqyA_h;
+                            rmsd0=rmsd0_h;
+                            L_ali=L_ali_h;
+                            Liden=Liden_h;
+                            TM_ali=TM_ali_h;
+                            rmsd_ali=rmsd_ali_h;
+                            n_ali=n_ali_h;
+                            n_ali8=n_ali8_h;
+                            tu_vec.clear();
+                            for (int hinge=0;hinge<tu_vec_h.size();hinge++)
+                                tu_vec.push_back(tu_vec_h[hinge]);
+                        }
+                        else tu2t_u(tu_vec[0],t0,u0);
+                    }
+
+                    /* print result */
+                    if (outfmt_opt==0) print_version();
+                    output_flexalign_results(
+                        xname.substr(dir1_opt.size()+dir_opt.size()),
+                        yname.substr(dir2_opt.size()+dir_opt.size()),
+                        chainID_list1[chain_i], chainID_list2[chain_j],
+                        xlen, ylen, t0, u0, tu_vec, TM1, TM2, TM3, TM4, TM5,
+                        rmsd0, d0_out, seqM.c_str(),
+                        seqxA.c_str(), seqyA.c_str(), Liden,
+                        n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0,
+                        d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, 
+                        (m_opt?fname_matrix:"").c_str(),
+                        outfmt_opt, ter_opt, false, split_opt, o_opt,
+                        fname_super, i_opt, a_opt, u_opt, d_opt, mirror_opt,
+                        resi_vec1, resi_vec2);
+
+                    /* Done! Free memory */
+                    tu_vec.clear();
+                    seqM.clear();
+                    seqxA.clear();
+                    seqyA.clear();
+                    DeleteArray(&ya, ylen);
+                    delete [] seqy;
+                    delete [] secy;
+                    resi_vec2.clear();
+                } // chain_j
+                if (chain2_list.size()>1)
+                {
+                    yname.clear();
+                    for (chain_j=0;chain_j<ychainnum;chain_j++)
+                        PDB_lines2[chain_j].clear();
+                    PDB_lines2.clear();
+                    chainID_list2.clear();
+                    mol_vec2.clear();
+                }
+            } // j
+            PDB_lines1[chain_i].clear();
+            DeleteArray(&xa, xlen);
+            delete [] seqx;
+            delete [] secx;
+            resi_vec1.clear();
+        } // chain_i
+        xname.clear();
+        PDB_lines1.clear();
+        chainID_list1.clear();
+        mol_vec1.clear();
+    } // i
+    if (chain2_list.size()==1)
+    {
+        yname.clear();
+        for (chain_j=0;chain_j<ychainnum;chain_j++)
+            PDB_lines2[chain_j].clear();
+        PDB_lines2.clear();
+        resi_vec2.clear();
+        chainID_list2.clear();
+        mol_vec2.clear();
+    }
+    return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2) print_help();
+
+
+    clock_t t1, t2;
+    t1 = clock();
+
+    /**********************/
+    /*    get argument    */
+    /**********************/
+    string xname       = "";
+    string yname       = "";
+    string fname_super = ""; // file name for superposed structure
+    string fname_lign  = ""; // file name for user alignment
+    string fname_matrix= ""; // file name for output matrix
+    vector<string> sequence; // get value from alignment file
+    double Lnorm_ass, d0_scale;
+
+    bool h_opt = false; // print full help message
+    bool v_opt = false; // print version
+    bool m_opt = false; // flag for -m, output rotation matrix
+    int  i_opt = 0;     // 1 for -i, 3 for -I
+    int  o_opt = 0;     // 1 for -o, 2 for -rasmol
+    int  a_opt = 0;     // flag for -a, do not normalized by average length
+    bool u_opt = false; // flag for -u, normalized by user specified length
+    bool d_opt = false; // flag for -d, user specified d0
+
+    bool   full_opt  = false;// do not show chain level alignment
+    double TMcut     =-1;
+    bool   se_opt    =false;
+    int    infmt1_opt=-1;    // PDB or PDBx/mmCIF format for chain_1
+    int    infmt2_opt=-1;    // PDB or PDBx/mmCIF format for chain_2
+    int    ter_opt   =2;     // END, or different chainID
+    int    split_opt =2;     // split each chains
+    int    outfmt_opt=0;     // set -outfmt to full output
+    bool   fast_opt  =false; // flags for -fast, fTM-align algorithm
+    int    cp_opt    =0;     // do not check circular permutation
+    int    closeK_opt=-1;    // number of atoms for SOI initial alignment.
+                             // 5 and 0 for -mm 5 and 6
+    int    hinge_opt =9;     // maximum number of hinge allowed for flexible
+    int    mirror_opt=0;     // do not align mirror
+    int    het_opt=0;        // do not read HETATM residues
+    int    mm_opt=0;         // do not perform MM-align
+    string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
+    string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
+    string suffix_opt="";    // set -suffix to empty
+    string dir_opt   ="";    // set -dir to empty
+    string dir1_opt  ="";    // set -dir1 to empty
+    string dir2_opt  ="";    // set -dir2 to empty
+    int    byresi_opt=0;     // set -byresi to 0
+    vector<string> chain1_list; // only when -dir1 is set
+    vector<string> chain2_list; // only when -dir2 is set
+
+    for(int i = 1; i < argc; i++)
+    {
+        if ( !strcmp(argv[i],"-o") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -o");
+            if (o_opt==2)
+                cerr<<"Warning! -rasmol is already set. Ignore -o"<<endl;
+            else
+            {
+                fname_super = argv[i + 1];
+                o_opt = 1;
+            }
+            i++;
+        }
+        else if ( !strcmp(argv[i],"-rasmol") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -rasmol");
+            if (o_opt==1)
+                cerr<<"Warning! -o is already set. Ignore -rasmol"<<endl;
+            else
+            {
+                fname_super = argv[i + 1];
+                o_opt = 2;
+            }
+            i++;
+        }
+        else if ( !strcmp(argv[i],"-u") || !strcmp(argv[i],"-L") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -u or -L");
+            Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++;
+            if (Lnorm_ass<=0) PrintErrorAndQuit(
+                "ERROR! The value for -u or -L should be >0");
+        }
+        else if ( !strcmp(argv[i],"-a") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -a");
+            if (!strcmp(argv[i + 1], "T"))      a_opt=true;
+            else if (!strcmp(argv[i + 1], "F")) a_opt=false;
+            else 
+            {
+                a_opt=atoi(argv[i + 1]);
+                if (a_opt!=-2 && a_opt!=-1 && a_opt!=1)
+                    PrintErrorAndQuit("-a must be -2, -1, 1, T or F");
+            }
+            i++;
+        }
+        else if ( !strcmp(argv[i],"-full") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -full");
+            if (!strcmp(argv[i + 1], "T"))      full_opt=true;
+            else if (!strcmp(argv[i + 1], "F")) full_opt=false;
+            else PrintErrorAndQuit("-full must be T or F");
+            i++;
+        }
+        else if ( !strcmp(argv[i],"-d") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -d");
+            d0_scale = atof(argv[i + 1]); d_opt = true; i++;
+        }
+        else if ( !strcmp(argv[i],"-closeK") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -closeK");
+            closeK_opt = atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-hinge") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -hinge");
+            hinge_opt = atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-v") )
+        {
+            v_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-h") )
+        {
+            h_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-i") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -i");
+            if (i_opt==3)
+                PrintErrorAndQuit("ERROR! -i and -I cannot be used together");
+            fname_lign = argv[i + 1];      i_opt = 1; i++;
+        }
+        else if (!strcmp(argv[i], "-I") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -I");
+            if (i_opt==1)
+                PrintErrorAndQuit("ERROR! -I and -i cannot be used together");
+            fname_lign = argv[i + 1];      i_opt = 3; i++;
+        }
+        else if (!strcmp(argv[i], "-m") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -m");
+            fname_matrix = argv[i + 1];    m_opt = true; i++;
+        }// get filename for rotation matrix
+        else if (!strcmp(argv[i], "-fast"))
+        {
+            fast_opt = true;
+        }
+        else if (!strcmp(argv[i], "-se"))
+        {
+            se_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-infmt1") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -infmt1");
+            infmt1_opt=atoi(argv[i + 1]); i++;
+            if (infmt1_opt<-1 || infmt1_opt>3)
+                PrintErrorAndQuit("ERROR! -infmt1 can only be -1, 0, 1, 2, or 3");
+        }
+        else if ( !strcmp(argv[i],"-infmt2") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -infmt2");
+            infmt2_opt=atoi(argv[i + 1]); i++;
+            if (infmt2_opt<-1 || infmt2_opt>3)
+                PrintErrorAndQuit("ERROR! -infmt2 can only be -1, 0, 1, 2, or 3");
+        }
+        else if ( !strcmp(argv[i],"-ter") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -ter");
+            ter_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-split") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -split");
+            split_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-atom") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -atom");
+            atom_opt=argv[i + 1]; i++;
+            if (atom_opt.size()!=4) PrintErrorAndQuit(
+                "ERROR! Atom name must have 4 characters, including space.\n"
+                "For example, C alpha, C3' and P atoms should be specified by\n"
+                "-atom \" CA \", -atom \" P  \" and -atom \" C3'\", respectively.");
+        }
+        else if ( !strcmp(argv[i],"-mol") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -mol");
+            mol_opt=argv[i + 1]; i++;
+            if (mol_opt=="prot") mol_opt="protein";
+            else if (mol_opt=="DNA") mol_opt="RNA";
+            if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
+                PrintErrorAndQuit("ERROR! Molecule type must be one of the "
+                    "following:\nauto, prot (the same as 'protein'), and "
+                    "RNA (the same as 'DNA').");
+        }
+        else if ( !strcmp(argv[i],"-dir") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -dir");
+            dir_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir1") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -dir1");
+            dir1_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir2") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -dir2");
+            dir2_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-suffix") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -suffix");
+            suffix_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-outfmt") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -outfmt");
+            outfmt_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-TMcut") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -TMcut");
+            TMcut=atof(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-byresi")  || 
+                  !strcmp(argv[i],"-tmscore") ||
+                  !strcmp(argv[i],"-TMscore"))
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -byresi");
+            byresi_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-seq") )
+        {
+            byresi_opt=5;
+        }
+        else if ( !strcmp(argv[i],"-cp") )
+        {
+            mm_opt=3;
+        }
+        else if ( !strcmp(argv[i],"-mirror") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -mirror");
+            mirror_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-het") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -het");
+            het_opt=atoi(argv[i + 1]); i++;
+            if (het_opt!=0 && het_opt!=1 && het_opt!=2)
+                PrintErrorAndQuit("-het must be 0, 1, or 2");
+        }
+        else if ( !strcmp(argv[i],"-mm") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -mm");
+            mm_opt=atoi(argv[i + 1]); i++;
+        }
+        else if (xname.size() == 0) xname=argv[i];
+        else if (yname.size() == 0) yname=argv[i];
+        else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
+    }
+
+    if(xname.size()==0 || (yname.size()==0 && dir_opt.size()==0) || 
+                          (yname.size()    && dir_opt.size()))
+    {
+        if (h_opt) print_help(h_opt);
+        if (v_opt)
+        {
+            print_version();
+            exit(EXIT_FAILURE);
+        }
+        if (xname.size()==0)
+            PrintErrorAndQuit("Please provide input structures");
+        else if (yname.size()==0 && dir_opt.size()==0 && mm_opt!=4)
+            PrintErrorAndQuit("Please provide structure B");
+        else if (yname.size() && dir_opt.size())
+            PrintErrorAndQuit("Please provide only one file name if -dir is set");
+    }
+
+    if (suffix_opt.size() && dir_opt.size()+dir1_opt.size()+dir2_opt.size()==0)
+        PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set");
+    if ((dir_opt.size() || dir1_opt.size() || dir2_opt.size()))
+    {
+        if (mm_opt!=2 && mm_opt!=4)
+        {
+            if (o_opt)
+                PrintErrorAndQuit("-o cannot be set with -dir, -dir1 or -dir2");
+            if (m_opt && fname_matrix!="-")
+                PrintErrorAndQuit("-m can only be - or unset when using -dir, -dir1 or -dir2");
+        }
+        else if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size()))
+            PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2");
+    }
+    if (o_opt && (infmt1_opt!=-1 && infmt1_opt!=0 && infmt1_opt!=3))
+        PrintErrorAndQuit("-o can only be used with -infmt1 -1, 0 or 3");
+
+    if (mol_opt=="protein" && atom_opt=="auto")
+        atom_opt=" CA ";
+    else if (mol_opt=="RNA" && atom_opt=="auto")
+        atom_opt=" C3'";
+
+    if (d_opt && d0_scale<=0)
+        PrintErrorAndQuit("Wrong value for option -d! It should be >0");
+    if (outfmt_opt>=2 && (a_opt || u_opt || d_opt))
+        PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d");
+    if (byresi_opt!=0)
+    {
+        if (i_opt)
+            PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I");
+        if (byresi_opt<0 || byresi_opt>6)
+            PrintErrorAndQuit("-byresi can only be 0 to 6");
+        if ((byresi_opt==2 || byresi_opt==3 || byresi_opt==6) && ter_opt>=2)
+            PrintErrorAndQuit("-byresi 2 and 6 must be used with -ter <=1");
+    }
+    //if (split_opt==1 && ter_opt!=0)
+        //PrintErrorAndQuit("-split 1 should be used with -ter 0");
+    //else if (split_opt==2 && ter_opt!=0 && ter_opt!=1)
+        //PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1");
+    if (split_opt<0 || split_opt>2)
+        PrintErrorAndQuit("-split can only be 0, 1 or 2");
+
+    if (mm_opt==3)
+    {
+        cp_opt=true;
+        mm_opt=0;
+    }
+    if (cp_opt && i_opt)
+        PrintErrorAndQuit("-mm 3 cannot be used with -i or -I");
+
+    if (mirror_opt && het_opt!=1)
+        cerr<<"WARNING! -mirror was not used with -het 1. "
+            <<"D amino acids may not be correctly aligned."<<endl;
+
+    if (mm_opt)
+    {
+        if (i_opt) PrintErrorAndQuit("-mm cannot be used with -i or -I");
+        if (u_opt) PrintErrorAndQuit("-mm cannot be used with -u or -L");
+        //if (cp_opt) PrintErrorAndQuit("-mm cannot be used with -cp");
+        if (dir_opt.size() && (mm_opt==1||mm_opt==2)) PrintErrorAndQuit("-mm 1 or 2 cannot be used with -dir");
+        if (byresi_opt) PrintErrorAndQuit("-mm cannot be used with -byresi");
+        if (ter_opt>=2 && (mm_opt==1 || mm_opt==2)) PrintErrorAndQuit("-mm 1 or 2 must be used with -ter 0 or -ter 1");
+        if (mm_opt==4 && (yname.size() || dir2_opt.size()))
+            cerr<<"WARNING! structure_2 is ignored for -mm 4"<<endl;
+    }
+    else if (full_opt) PrintErrorAndQuit("-full can only be used with -mm");
+
+    if (o_opt && ter_opt<=1 && split_opt==2)
+    {
+        if (mm_opt && o_opt==2) cerr<<"WARNING! -mm may generate incorrect" 
+            <<" RasMol output due to limitations in PDB file format. "
+            <<"When -mm is used, -o is recommended over -rasmol"<<endl;
+        else if (mm_opt==0) cerr<<"WARNING! Only the superposition of the"
+            <<"last aligned chain pair will be generated"<<endl;
+    }
+
+    if (closeK_opt<0)
+    {
+        if (mm_opt==5) closeK_opt=5;
+        else closeK_opt=0;
+    }
+
+    if (mm_opt==7 && hinge_opt>=10)
+        PrintErrorAndQuit("ERROR! -hinge must be <10");
+
+
+    /* read initial alignment file from 'align.txt' */
+    if (i_opt) read_user_alignment(sequence, fname_lign, i_opt);
+
+    if (byresi_opt==6) mm_opt=1;
+    else if (byresi_opt) i_opt=3;
+
+    if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt
+        PrintErrorAndQuit("ERROR! Please provide a file name for option -m!");
+
+    /* parse file list */
+    if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname);
+    else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt);
+
+    int i; 
+    if (dir_opt.size())
+        for (i=0;i<chain1_list.size();i++)
+            chain2_list.push_back(chain1_list[i]);
+    else if (dir2_opt.size()==0) chain2_list.push_back(yname);
+    else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt);
+
+    if (outfmt_opt==2)
+    {
+        if (mm_opt==2) cout<<"#Query\tTemplate\tTM"<<endl;
+        else cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t"
+            <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"<<endl;
+    }
+
+    /* real alignment. entry functions are MMalign_main and 
+     * TMalign_main */
+    if (mm_opt==0) TMalign(xname, yname, fname_super, fname_lign, fname_matrix,
+        sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt,
+        u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt,
+        split_opt, outfmt_opt, fast_opt, cp_opt, mirror_opt, het_opt,
+        atom_opt, mol_opt, dir_opt, dir1_opt, dir2_opt, byresi_opt,
+        chain1_list, chain2_list, se_opt);
+    else if (mm_opt==1) MMalign(xname, yname, fname_super, fname_lign,
+        fname_matrix, sequence, d0_scale, m_opt, o_opt,
+        a_opt, d_opt, full_opt, TMcut, infmt1_opt, infmt2_opt,
+        ter_opt, split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt,
+        atom_opt, mol_opt, dir1_opt, dir2_opt, chain1_list, chain2_list,
+        byresi_opt);
+    else if (mm_opt==2) MMdock(xname, yname, fname_super, 
+        fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, o_opt, a_opt,
+        u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt,
+        split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt,
+        atom_opt, mol_opt, dir1_opt, dir2_opt, chain1_list, chain2_list);
+    else if (mm_opt==3) ; // should be changed to mm_opt=0, cp_opt=true
+    else if (mm_opt==4) mTMalign(xname, yname, fname_super, fname_matrix,
+        sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt, a_opt,
+        u_opt, d_opt, full_opt, TMcut, infmt1_opt, ter_opt,
+        split_opt, outfmt_opt, fast_opt, het_opt,
+        atom_opt, mol_opt, dir_opt, byresi_opt, chain1_list);
+    else if (mm_opt==5 || mm_opt==6) SOIalign(xname, yname, fname_super, fname_lign,
+        fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt,
+        a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt,
+        split_opt, outfmt_opt, fast_opt, cp_opt, mirror_opt, het_opt,
+        atom_opt, mol_opt, dir_opt, dir1_opt, dir2_opt, 
+        chain1_list, chain2_list, se_opt, closeK_opt, mm_opt);
+    else if (mm_opt==7) flexalign(xname, yname, fname_super, fname_lign, 
+        fname_matrix, sequence, Lnorm_ass, d0_scale, m_opt, i_opt, o_opt,
+        a_opt, u_opt, d_opt, TMcut, infmt1_opt, infmt2_opt, ter_opt,
+        split_opt, outfmt_opt, fast_opt, mirror_opt, het_opt,
+        atom_opt, mol_opt, dir_opt, dir1_opt, dir2_opt, byresi_opt,
+        chain1_list, chain2_list, hinge_opt);
+    else cerr<<"WARNING! -mm "<<mm_opt<<" not implemented"<<endl;
+
+    /* clean up */
+    vector<string>().swap(chain1_list);
+    vector<string>().swap(chain2_list);
+    vector<string>().swap(sequence);
+
+    t2 = clock();
+    float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
+    if (outfmt_opt<2) printf("#Total CPU time is %5.2f seconds\n", diff);
+    return 0;
+}
diff --git a/modules/bindings/src/tmalign/align.txt b/modules/bindings/src/USalign/align.txt
similarity index 100%
rename from modules/bindings/src/tmalign/align.txt
rename to modules/bindings/src/USalign/align.txt
diff --git a/modules/bindings/src/tmalign/basic_fun.h b/modules/bindings/src/USalign/basic_fun.h
similarity index 78%
rename from modules/bindings/src/tmalign/basic_fun.h
rename to modules/bindings/src/USalign/basic_fun.h
index 0e8ae307d..0fe070119 100644
--- a/modules/bindings/src/tmalign/basic_fun.h
+++ b/modules/bindings/src/USalign/basic_fun.h
@@ -137,6 +137,17 @@ void split(const string &line, vector<string> &line_vec,
     }
 }
 
+/* strip white space at the begining or end of string */
+string Trim(const string &inputString)
+{
+    string result = inputString;
+    int idxBegin = inputString.find_first_not_of(" \n\r\t");
+    int idxEnd = inputString.find_last_not_of(" \n\r\t");
+    if (idxBegin >= 0 && idxEnd >= 0)
+        result = inputString.substr(idxBegin, idxEnd + 1 - idxBegin);
+    return result;
+}
+
 size_t get_PDB_lines(const string filename,
     vector<vector<string> >&PDB_lines, vector<string> &chainID_list,
     vector<int> &mol_vec, const int ter_opt, const int infmt_opt,
@@ -152,11 +163,14 @@ size_t get_PDB_lines(const string filename,
     
     int compress_type=0; // uncompressed file
     ifstream fin;
+#ifndef REDI_PSTREAM_H_SEEN
+    ifstream fin_gz;
+#else
     redi::ipstream fin_gz; // if file is compressed
     if (filename.size()>=3 && 
         filename.substr(filename.size()-3,3)==".gz")
     {
-        fin_gz.open("zcat '"+filename+"'");
+        fin_gz.open("gunzip -c '"+filename+"'");
         compress_type=1;
     }
     else if (filename.size()>=4 && 
@@ -165,14 +179,20 @@ size_t get_PDB_lines(const string filename,
         fin_gz.open("bzcat '"+filename+"'");
         compress_type=2;
     }
-    else fin.open(filename.c_str());
+    else
+#endif
+    {
+        if (filename=="-") compress_type=-1;
+        else fin.open(filename.c_str());
+    }
 
     if (infmt_opt==0||infmt_opt==-1) // PDB format
     {
-        while (compress_type?fin_gz.good():fin.good())
+        while ((compress_type==-1)?cin.good():(compress_type?fin_gz.good():fin.good()))
         {
-            if (compress_type) getline(fin_gz, line);
-            else               getline(fin, line);
+            if  (compress_type==-1) getline(cin, line);
+            else if (compress_type) getline(fin_gz, line);
+            else                    getline(fin, line);
             if (infmt_opt==-1 && line.compare(0,5,"loop_")==0) // PDBx/mmCIF
                 return get_PDB_lines(filename,PDB_lines,chainID_list,
                     mol_vec, ter_opt, 3, atom_opt, split_opt,het_opt);
@@ -194,6 +214,13 @@ size_t get_PDB_lines(const string filename,
                          select_atom=(line.compare(12,4," C3'")==0);
                     else select_atom=(line.compare(12,4," CA ")==0);
                 }
+                else if (atom_opt=="PC4'")
+                {
+                    if (line[17]==' ' && (line[18]=='D'||line[18]==' '))
+                         select_atom=(line.compare(12,4," P  ")==0
+                                  )||(line.compare(12,4," C4'")==0);
+                    else select_atom=(line.compare(12,4," CA ")==0);
+                }
                 else     select_atom=(line.compare(12,4,atom_opt)==0);
                 if (select_atom)
                 {
@@ -246,7 +273,7 @@ size_t get_PDB_lines(const string filename,
                         mol_vec.push_back(0);
                     }
 
-                    if (resi==line.substr(22,5))
+                    if (resi==line.substr(22,5) && atom_opt!="PC4'")
                         cerr<<"Warning! Duplicated residue "<<resi<<endl;
                     resi=line.substr(22,5); // including insertion code
 
@@ -263,13 +290,26 @@ size_t get_PDB_lines(const string filename,
         size_t L=0;
         float x,y,z;
         stringstream i8_stream;
-        while (compress_type?fin_gz.good():fin.good())
+        while ((compress_type==-1)?cin.good():(compress_type?fin_gz.good():fin.good()))
         {
-            if (compress_type) fin_gz>>L>>x>>y>>z;
-            else               fin   >>L>>x>>y>>z;
-            if (compress_type) getline(fin_gz, line);
-            else               getline(fin, line);
-            if (!(compress_type?fin_gz.good():fin.good())) break;
+            if  (compress_type==-1)
+            {
+                cin>>L>>x>>y>>z;
+                getline(cin, line);
+                if (!cin.good()) break;
+            }
+            else if (compress_type)
+            {
+                fin_gz>>L>>x>>y>>z;
+                getline(fin_gz, line);
+                if (!fin_gz.good()) break;
+            }
+            else
+            {
+                fin   >>L>>x>>y>>z;
+                getline(fin, line);
+                if (!fin.good()) break;
+            }
             model_idx++;
             stringstream i8_stream;
             i8_stream << ':' << model_idx;
@@ -278,8 +318,9 @@ size_t get_PDB_lines(const string filename,
             mol_vec.push_back(0);
             for (i=0;i<L;i++)
             {
-                if (compress_type) fin_gz>>x>>y>>z;
-                else               fin   >>x>>y>>z;
+                if  (compress_type==-1) cin>>x>>y>>z;
+                else if (compress_type) fin_gz>>x>>y>>z;
+                else                    fin   >>x>>y>>z;
                 i8_stream<<"ATOM   "<<setw(4)<<i+1<<"  CA  UNK  "<<setw(4)
                     <<i+1<<"    "<<setiosflags(ios::fixed)<<setprecision(3)
                     <<setw(8)<<x<<setw(8)<<y<<setw(8)<<z;
@@ -287,31 +328,35 @@ size_t get_PDB_lines(const string filename,
                 i8_stream.str(string());
                 PDB_lines.back().push_back(line);
             }
-            if (compress_type) getline(fin_gz, line);
-            else               getline(fin, line);
+            if  (compress_type==-1) getline(cin, line);
+            else if (compress_type) getline(fin_gz, line);
+            else                    getline(fin, line);
         }
     }
     else if (infmt_opt==2) // xyz format
     {
         size_t L=0;
         stringstream i8_stream;
-        while (compress_type?fin_gz.good():fin.good())
+        while ((compress_type==-1)?cin.good():(compress_type?fin_gz.good():fin.good()))
         {
-            if (compress_type) getline(fin_gz, line);
-            else               getline(fin, line);
+            if (compress_type==-1)  getline(cin, line);
+            else if (compress_type) getline(fin_gz, line);
+            else                    getline(fin, line);
             L=atoi(line.c_str());
-            if (compress_type) getline(fin_gz, line);
-            else               getline(fin, line);
+            if (compress_type==-1)  getline(cin, line);
+            else if (compress_type) getline(fin_gz, line);
+            else                    getline(fin, line);
             for (i=0;i<line.size();i++)
                 if (line[i]==' '||line[i]=='\t') break;
-            if (!(compress_type?fin_gz.good():fin.good())) break;
+            if (!((compress_type==-1)?cin.good():(compress_type?fin_gz.good():fin.good()))) break;
             chainID_list.push_back(':'+line.substr(0,i));
             PDB_lines.push_back(tmp_str_vec);
             mol_vec.push_back(0);
             for (i=0;i<L;i++)
             {
-                if (compress_type) getline(fin_gz, line);
-                else               getline(fin, line);
+                if (compress_type==-1)  getline(cin, line);
+                else if (compress_type) getline(fin_gz, line);
+                else                    getline(fin, line);
                 i8_stream<<"ATOM   "<<setw(4)<<i+1<<"  CA  "
                     <<AAmap(line[0])<<"  "<<setw(4)<<i+1<<"    "
                     <<line.substr(2,8)<<line.substr(11,8)<<line.substr(20,8);
@@ -339,18 +384,24 @@ size_t get_PDB_lines(const string filename,
         string prev_resi="";
         string model_index=""; // the same as model_idx but type is string
         stringstream i8_stream;
-        while (compress_type?fin_gz.good():fin.good())
+        while ((compress_type==-1)?cin.good():(compress_type?fin_gz.good():fin.good()))
         {
-            if (compress_type) getline(fin_gz, line);
-            else               getline(fin, line);
+            if (compress_type==-1)  getline(cin, line);
+            else if (compress_type) getline(fin_gz, line);
+            else                    getline(fin, line);
             if (line.size()==0) continue;
-            if (loop_) loop_ = line.compare(0,2,"# ");
+            if (loop_) loop_ = (line.size()>=2)?(line.compare(0,2,"# ")):(line.compare(0,1,"#"));
             if (!loop_)
             {
                 if (line.compare(0,5,"loop_")) continue;
                 while(1)
                 {
-                    if (compress_type)
+                    if (compress_type==-1)
+                    {
+                        if (cin.good()) getline(cin, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of -");
+                    }
+                    else if (compress_type)
                     {
                         if (fin_gz.good()) getline(fin_gz, line);
                         else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
@@ -367,15 +418,16 @@ size_t get_PDB_lines(const string filename,
                 loop_=true;
                 _atom_site.clear();
                 atom_site_pos=0;
-                _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+                _atom_site[Trim(line.substr(11))]=atom_site_pos;
 
                 while(1)
                 {
-                    if (compress_type) getline(fin_gz, line);
-                    else               getline(fin, line);
+                    if  (compress_type==-1) getline(cin, line);
+                    else if (compress_type) getline(fin_gz, line);
+                    else                    getline(fin, line);
                     if (line.size()==0) continue;
                     if (line.compare(0,11,"_atom_site.")) break;
-                    _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+                    _atom_site[Trim(line.substr(11))]=++atom_site_pos;
                 }
 
 
@@ -431,6 +483,13 @@ size_t get_PDB_lines(const string filename,
                      select_atom=(atom==" C3'");
                 else select_atom=(atom==" CA ");
             }
+            else if (atom_opt=="PC4'")
+            {
+                if (line[17]==' ' && (line[18]=='D'||line[18]==' '))
+                     select_atom=(line.compare(12,4," P  ")==0
+                              )||(line.compare(12,4," C4'")==0);
+                else select_atom=(line.compare(12,4," CA ")==0);
+            }
             else     select_atom=(atom==atom_opt);
 
             if (!select_atom) continue;
@@ -493,7 +552,7 @@ size_t get_PDB_lines(const string filename,
                 resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
             else resi+=" ";
 
-            if (prev_resi==resi)
+            if (prev_resi==resi && atom_opt!="PC4'")
                 cerr<<"Warning! Duplicated residue "<<resi<<endl;
             prev_resi=resi;
 
@@ -514,8 +573,8 @@ size_t get_PDB_lines(const string filename,
         AA.clear();
     }
 
-    if (compress_type) fin_gz.close();
-    else               fin.close();
+    if      (compress_type>=1) fin_gz.close();
+    else if (compress_type==0) fin.close();
     line.clear();
     if (!split_opt) chainID_list.push_back("");
     return PDB_lines.size();
@@ -537,11 +596,14 @@ size_t get_FASTA_lines(const string filename,
     
     int compress_type=0; // uncompressed file
     ifstream fin;
+#ifndef REDI_PSTREAM_H_SEEN
+    ifstream fin_gz;
+#else
     redi::ipstream fin_gz; // if file is compressed
     if (filename.size()>=3 && 
         filename.substr(filename.size()-3,3)==".gz")
     {
-        fin_gz.open("zcat '"+filename+"'");
+        fin_gz.open("gunzip -c '"+filename+"'");
         compress_type=1;
     }
     else if (filename.size()>=4 && 
@@ -550,12 +612,19 @@ size_t get_FASTA_lines(const string filename,
         fin_gz.open("bzcat '"+filename+"'");
         compress_type=2;
     }
-    else fin.open(filename.c_str());
+    else 
+#endif
+    {
+        if (filename=="-") compress_type=-1;
+        else fin.open(filename.c_str());
+    }
 
-    while (compress_type?fin_gz.good():fin.good())
+    while ((compress_type==-1)?cin.good():(compress_type?fin_gz.good():fin.good()))
     {
-        if (compress_type) getline(fin_gz, line);
-        else               getline(fin, line);
+        if  (compress_type==-1) getline(cin, line);
+        else if (compress_type) getline(fin_gz, line);
+        else                    getline(fin, line);
+
         if (line.size()==0 || line[0]=='#') continue;
 
         if (line[0]=='>')
@@ -584,132 +653,11 @@ size_t get_FASTA_lines(const string filename,
     }
 
     line.clear();
-    if (compress_type) fin_gz.close();
-    else               fin.close();
+    if      (compress_type>=1) fin_gz.close();
+    else if (compress_type==0) fin.close();
     return FASTA_lines.size();
 }
 
-
-/* extract pairwise sequence alignment from residue index vectors,
- * assuming that "sequence" contains two empty strings.
- * return length of alignment, including gap. */
-int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
-    const vector<string> resi_vec1, const vector<string> resi_vec2,
-    const int byresi_opt)
-{
-    sequence.clear();
-    sequence.push_back("");
-    sequence.push_back("");
-
-    int i1=0; // positions in resi_vec1
-    int i2=0; // positions in resi_vec2
-    int xlen=resi_vec1.size();
-    int ylen=resi_vec2.size();
-    map<string,string> chainID_map1;
-    map<string,string> chainID_map2;
-    if (byresi_opt==3)
-    {
-        vector<string> chainID_vec;
-        string chainID;
-        stringstream ss;
-        int i;
-        for (i=0;i<xlen;i++)
-        {
-            chainID=resi_vec1[i].substr(5);
-            if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
-            {
-                chainID_vec.push_back(chainID);
-                ss<<chainID_vec.size();
-                chainID_map1[chainID]=ss.str();
-                ss.str("");
-            }
-        }
-        chainID_vec.clear();
-        for (i=0;i<ylen;i++)
-        {
-            chainID=resi_vec2[i].substr(5);
-            if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
-            {
-                chainID_vec.push_back(chainID);
-                ss<<chainID_vec.size();
-                chainID_map2[chainID]=ss.str();
-                ss.str("");
-            }
-        }
-        vector<string>().swap(chainID_vec);
-    }
-    string chainID1="";
-    string chainID2="";
-    string chainID1_prev="";
-    string chainID2_prev="";
-    while(i1<xlen && i2<ylen)
-    {
-        if (byresi_opt==2)
-        {
-            chainID1=resi_vec1[i1].substr(5);
-            chainID2=resi_vec2[i2].substr(5);
-        }
-        else if (byresi_opt==3)
-        {
-            chainID1=chainID_map1[resi_vec1[i1].substr(5)];
-            chainID2=chainID_map2[resi_vec2[i2].substr(5)];
-        }
-
-        if (chainID1==chainID2)
-        {
-            if (atoi(resi_vec1[i1].substr(0,4).c_str())<
-                atoi(resi_vec2[i2].substr(0,4).c_str()))
-            {
-                sequence[0]+=seqx[i1++];
-                sequence[1]+='-';
-            }
-            else if (atoi(resi_vec1[i1].substr(0,4).c_str())>
-                     atoi(resi_vec2[i2].substr(0,4).c_str()))
-            {
-                sequence[0]+='-';
-                sequence[1]+=seqy[i2++];
-            }
-            else
-            {
-                sequence[0]+=seqx[i1++];
-                sequence[1]+=seqy[i2++];
-            }
-            chainID1_prev=chainID1;
-            chainID2_prev=chainID2;
-        }
-        else
-        {
-            if (chainID1_prev==chainID1 && chainID2_prev!=chainID2)
-            {
-                sequence[0]+=seqx[i1++];
-                sequence[1]+='-';
-                chainID1_prev=chainID1;
-            }
-            else if (chainID1_prev!=chainID1 && chainID2_prev==chainID2)
-            {
-                sequence[0]+='-';
-                sequence[1]+=seqy[i2++];
-                chainID2_prev=chainID2;
-            }
-            else
-            {
-                sequence[0]+=seqx[i1++];
-                sequence[1]+=seqy[i2++];
-                chainID1_prev=chainID1;
-                chainID2_prev=chainID2;
-            }
-        }
-        
-    }
-    map<string,string>().swap(chainID_map1);
-    map<string,string>().swap(chainID_map2);
-    chainID1.clear();
-    chainID2.clear();
-    chainID1_prev.clear();
-    chainID2_prev.clear();
-    return sequence[0].size();
-}
-
 int read_PDB(const vector<string> &PDB_lines, double **a, char *seq,
     vector<string> &resi_vec, const int read_resi)
 {
@@ -758,17 +706,6 @@ void do_rotation(double **x, double **x1, int len, double t[3], double u[3][3])
     }    
 }
 
-/* strip white space at the begining or end of string */
-string Trim(const string &inputString)
-{
-    string result = inputString;
-    int idxBegin = inputString.find_first_not_of(" \n\r\t");
-    int idxEnd = inputString.find_last_not_of(" \n\r\t");
-    if (idxBegin >= 0 && idxEnd >= 0)
-        result = inputString.substr(idxBegin, idxEnd + 1 - idxBegin);
-    return result;
-}
-
 /* read user specified pairwise alignment from 'fname_lign' to 'sequence'.
  * This function should only be called by main function, as it will
  * terminate a program if wrong alignment is given */
diff --git a/modules/bindings/src/USalign/cif2pdb.cpp b/modules/bindings/src/USalign/cif2pdb.cpp
new file mode 100644
index 000000000..cfd06c269
--- /dev/null
+++ b/modules/bindings/src/USalign/cif2pdb.cpp
@@ -0,0 +1,533 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <string.h>
+
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+#include <string>
+#include <iomanip>
+#include <map>
+
+#include "pstream.h" // For reading gzip and bz2 compressed files
+
+using namespace std;
+
+void print_help()
+{
+    cout <<
+"Converting mmCIF file to PDB file(s)\n"
+"\n"
+"Usage: cif2pdb input.cif output.pdb\n"
+"\n"
+"    -chain   Specify auth chain ID to convert:\n"
+"             $ cif2pdb input.cif output.pdb -chain A\n"
+"\n"
+"    -mol     macromolecule type. default is all macromolecules.\n"
+"             1: protein only\n"
+"             2: RNA only\n"
+"             4: DNA only\n"
+"\n"
+"    -split   Whether to split PDB file into multiple chains\n"
+"             0: (default) do not split; output a single PDB\n"
+"             1: output one PDB file per chain\n"
+"\n"
+"    -het     Whether to read residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: only 'ATOM  ' residues\n"
+"             1: (default) 'ATOM  ' and 'HETATM' for MSE\n"
+"             2: 'ATOM  ' and all 'HETATM', excluding HOH\n"
+"             3: 'ATOM  ' and all 'HETATM', including HOH\n"
+"             If -het >=1, MSE will be converted to MET\n"
+    <<endl;
+    exit(EXIT_SUCCESS);
+}
+
+void PrintErrorAndQuit(const string sErrorString)
+{
+    cout << sErrorString << endl;
+    exit(1);
+}
+
+/* strip white space at the begining or end of string */
+string Trim(const string &inputString)
+{
+    string result = inputString;
+    int idxBegin = inputString.find_first_not_of(" \n\r\t");
+    int idxEnd = inputString.find_last_not_of(" \n\r\t");
+    if (idxBegin >= 0 && idxEnd >= 0)
+        result = inputString.substr(idxBegin, idxEnd + 1 - idxBegin);
+    return result;
+}
+
+/* split a long string into vectors by whitespace 
+ * line          - input string
+ * line_vec      - output vector 
+ * delimiter     - delimiter */
+void split(const string &line, vector<string> &line_vec,
+    const char delimiter=' ')
+{
+    bool within_word = false;
+    for (size_t pos=0;pos<line.size();pos++)
+    {
+        if (line[pos]==delimiter)
+        {
+            within_word = false;
+            continue;
+        }
+        if (!within_word)
+        {
+            within_word = true;
+            line_vec.push_back("");
+        }
+        line_vec.back()+=line[pos];
+    }
+}
+
+void write_mmcif_to_pdb(const string filename,
+    const vector<vector<string> >&PDB_lines,
+    const vector<string> &chainID_list, const int split_opt)
+{
+    size_t c,r;
+    
+    ofstream fout;
+    if (split_opt)
+    {
+        for (c=0;c<PDB_lines.size();c++)
+        {
+            if (PDB_lines[c].size()==0) continue;
+            if (filename=="-")
+            {
+                cout<<"REMARK cif2pdb "<<PDB_lines[c][0][21]<<" "<<chainID_list[c]<<endl;
+                for (r=0;r<PDB_lines[c].size();r++) cout<<PDB_lines[c][r];
+                cout<<"TER"<<endl;
+                continue;    
+            }
+            cout<<     filename+Trim(chainID_list[c])+".pdb"<<endl;
+            fout.open((filename+Trim(chainID_list[c])+".pdb").c_str());
+            fout<<"REMARK cif2pdb "<<PDB_lines[c][0][21]<<" "<<chainID_list[c]<<endl;
+            for (r=0;r<PDB_lines[c].size();r++) fout<<PDB_lines[c][r];
+            fout<<"TER"<<endl;
+            fout.close();
+        }
+    }
+    else if (filename=="-")
+    {
+        for (c=0;c<PDB_lines.size();c++)
+        {
+            if (PDB_lines[c].size()==0) continue;
+            cout<<"REMARK cif2pdb "<<PDB_lines[c][0][21]<<" "<<chainID_list[c]<<endl;
+        }
+        for (c=0;c<PDB_lines.size();c++)
+        {
+            if (PDB_lines[c].size()==0) continue;
+            for (r=0;r<PDB_lines[c].size();r++) cout<<PDB_lines[c][r];
+            cout<<"TER"<<endl;
+        }
+        cout<<"END"<<endl;
+    }
+    else
+    {
+        fout.open(filename.c_str());
+        for (c=0;c<PDB_lines.size();c++)
+        {
+            if (PDB_lines[c].size()==0) continue;
+            fout<<"REMARK cif2pdb "<<PDB_lines[c][0][21]<<" "<<chainID_list[c]<<endl;
+        }
+        for (c=0;c<PDB_lines.size();c++)
+        {
+            if (PDB_lines[c].size()==0) continue;
+            for (r=0;r<PDB_lines[c].size();r++) fout<<PDB_lines[c][r];
+            fout<<"TER"<<endl;
+        }
+        fout<<"END"<<endl;
+        fout.close();
+    }
+    return;
+}
+
+size_t resolve_chainID_for_mmcif(vector<vector<string> >&PDB_lines,
+    const vector<string> &chainID_list)
+{
+    size_t changed_chains=0;
+    size_t c,r,i;
+    string chainID;
+    
+    string chainID_string="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+    vector<bool> chainID_taken(chainID_string.size(),false);
+    vector<bool> chainID_accept(chainID_list.size(),false);
+
+    /* accept all single character chain ID */
+    for (c=0;c<PDB_lines.size();c++)
+    {
+        if (PDB_lines[c].size()==0) continue;
+        chainID=PDB_lines[c][0][21];
+        if (chainID!=chainID_list[c]) continue;
+        chainID_accept[c]=true;
+        for (i=0;i<chainID_string.size();i++)
+        {
+            if (chainID_string[i]!=chainID[0]) continue;
+            if (chainID_taken[i]) chainID_accept[c]=false;
+            else chainID_taken[i]=true;
+            break;
+        }
+    }
+
+    /* accept all remaining non-conflicting chain ID */
+    for (c=0;c<PDB_lines.size();c++)
+    {
+        if (PDB_lines[c].size()==0 || chainID_accept[c]) continue;
+        chainID=PDB_lines[c][0][21];
+        chainID_accept[c]=true;
+        for (i=0;i<chainID_string.size();i++)
+        {
+            if (chainID_string[i]!=chainID[0]) continue;
+            if (chainID_taken[i]) chainID_accept[c]=false;
+            else chainID_taken[i]=true;
+            break;
+        }
+    }
+
+    /* resolve remaining chain ID */
+    for (c=0;c<PDB_lines.size();c++)
+    {
+        if (PDB_lines[c].size()==0 || chainID_accept[c]) continue;
+        chainID="";
+        for (i=0;i<chainID_taken.size();i++)
+        {
+            if (chainID_taken[i]) continue;
+            chainID=chainID_string[i];
+            chainID_taken[i]=true;
+            break;
+        }
+        if (chainID=="")
+        {
+            cerr<<"WARNING! Cannot parse "<<chainID_list[c]<<" with "
+                <<PDB_lines[c].size()<<" atoms due to chain ID conflict. "
+                <<"Please consider -split 1"<<endl;
+            vector<string>().swap(PDB_lines[c]);
+        }
+        else
+        {
+            for (r=0;r<PDB_lines[c].size();r++) PDB_lines[c][r]=
+                PDB_lines[c][r].substr(0,21)+chainID+PDB_lines[c][r].substr(22);
+            changed_chains++;
+        }
+    }
+    if (changed_chains)
+        cerr<<"WARNING! Changed "<<changed_chains<<" chain ID(s)"<<endl;
+    
+    /* clean up*/
+    chainID.clear();
+    string().swap(chainID_string);
+    vector<bool>().swap(chainID_taken);
+    vector<bool>().swap(chainID_accept);
+    return changed_chains;
+}
+
+size_t get_all_mmcif_lines(const string filename, const string chain_opt,
+    vector<vector<string> >&PDB_lines, vector<string> &chainID_list,
+    const bool dna_opt, const bool rna_opt, const bool protein_opt,
+    const bool hoh_opt,  const bool lig_opt, const bool mse_opt)
+{
+    size_t a=0; // atom index
+    string line;
+    bool select_atom=false;
+    size_t model_idx=0;
+    vector<string> tmp_str_vec;
+    
+    int compress_type=0; // uncompressed file
+    ifstream fin;
+    redi::ipstream fin_gz; // if file is compressed
+    if (filename.size()>=3 && 
+        filename.substr(filename.size()-3,3)==".gz")
+    {
+        fin_gz.open("gunzip -c '"+filename+"'");
+        compress_type=1;
+    }
+    else if (filename.size()>=4 && 
+        filename.substr(filename.size()-4,4)==".bz2")
+    {
+        fin_gz.open("bzcat '"+filename+"'");
+        compress_type=2;
+    }
+    else
+    {
+        if (filename=="-") compress_type=-1;
+        else fin.open(filename.c_str());
+    }
+
+    bool loop_ = false; // not reading following content
+    map<string,int> _atom_site;
+    int atom_site_pos;
+    vector<string> line_vec;
+    string group_PDB="ATOM  ";
+    string alt_id=".";  // alternative location indicator
+    string asym_id="."; // this is similar to chainID, except that
+                        // chainID is char while asym_id is a string
+                       // with possibly multiple char
+    string prev_asym_id="";
+    string resn="";       // residue name
+    string resi="";
+    string atom="";
+    string model_index=""; // the same as model_idx but type is string
+    stringstream i8_stream;
+    while ((compress_type==-1)?cin.good():(compress_type?fin_gz.good():fin.good()))
+    {
+        if  (compress_type==-1) getline(cin, line);
+        else if (compress_type) getline(fin_gz, line);
+        else                    getline(fin, line);
+        if (line.size()==0) continue;
+        if (loop_) loop_ = (line.size()>=2)?(line.compare(0,2,"# ")):(line.compare(0,1,"#"));
+        if (!loop_)
+        {
+            if (line.compare(0,5,"loop_")) continue;
+            while(1)
+            {
+                if (compress_type==-1)
+                {
+                    if (cin.good()) getline(cin, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                }
+                else if (compress_type)
+                {
+                    if (fin_gz.good()) getline(fin_gz, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                }
+                else
+                {
+                    if (fin.good()) getline(fin, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                }
+                if (line.size()) break;
+            }
+            if (line.compare(0,11,"_atom_site.")) continue; 
+           
+            loop_=true;
+            _atom_site.clear();
+            atom_site_pos=0;
+            _atom_site[Trim(line.substr(11))]=atom_site_pos;
+
+            while(1)
+            {
+                if  (compress_type==-1) getline(cin, line);
+                else if (compress_type) getline(fin_gz, line);
+                else                    getline(fin, line);
+                if (line.size()==0) continue;
+                if (line.compare(0,11,"_atom_site.")) break;
+                _atom_site[Trim(line.substr(11))]=++atom_site_pos;
+            }
+
+            if (_atom_site.count("group_PDB")*
+                _atom_site.count("label_atom_id")*
+                _atom_site.count("label_comp_id")*
+               (_atom_site.count("auth_asym_id")+
+                _atom_site.count("label_asym_id"))*
+               (_atom_site.count("auth_seq_id")+
+                _atom_site.count("label_seq_id"))*
+                _atom_site.count("Cartn_x")*
+                _atom_site.count("Cartn_y")*
+                _atom_site.count("Cartn_z")==0)
+            {
+                loop_ = false;
+                cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, label_atom_id, label_comp_id, auth_asym_id/label_asym_id, auth_seq_id/label_seq_id, Cartn_x, Cartn_y, Cartn_z"<<endl;
+                continue;
+            }
+        }
+
+        line_vec.clear();
+        split(line,line_vec);
+        atom     =line_vec[_atom_site["label_atom_id"]];
+        resn     =line_vec[_atom_site["label_comp_id"]];
+        group_PDB=line_vec[_atom_site["group_PDB"]];
+        if (group_PDB=="ATOM") group_PDB="ATOM  ";
+        if (mse_opt && resn=="MSE")
+        {
+            group_PDB="ATOM  ";
+            if (atom=="SE") atom="SD";
+        }
+        if (group_PDB=="HETATM")
+        {
+            if (!lig_opt) continue; 
+            if (!hoh_opt && resn=="HOH") continue;
+            if (asym_id!=prev_asym_id && prev_asym_id.size()) 
+                asym_id=prev_asym_id; // no separate chain for ligand
+        }
+        else if (group_PDB!="ATOM  ") continue;
+            
+        alt_id=".";
+        if (_atom_site.count("label_alt_id")) // in 39.4 % of entries
+            alt_id=line_vec[_atom_site["label_alt_id"]];
+        if (alt_id!="." && alt_id!="A") continue;
+
+        if (resn.size()==1)
+        {
+            if (!rna_opt && group_PDB=="ATOM  ") continue;
+            resn="  "+resn;
+        }
+        else if (resn.size()==2)
+        {
+            if (!dna_opt && resn[0]=='D' && group_PDB=="ATOM  ") continue;
+            resn=" " +resn;
+        }
+        else if (resn.size()==3 && !protein_opt && group_PDB=="ATOM  ") continue;
+        else if (resn.size()>=4) resn=resn.substr(0,3);
+
+        if (atom[0]=='"') atom=atom.substr(1);
+        if (atom.size() && atom[atom.size()-1]=='"')
+            atom=atom.substr(0,atom.size()-1);
+        if      (atom.size()==0) continue;
+        else if (atom.size()==1) atom=" "+atom+"  ";
+        else if (atom.size()==2) atom=" "+atom+" "; // wrong for sidechain H
+        else if (atom.size()==3) atom=" "+atom;
+        else if (atom.size()>=5) atom=atom.substr(0,4);
+        
+        if (_atom_site.count("auth_seq_id"))
+             resi=line_vec[_atom_site["auth_seq_id"]];
+        else resi=line_vec[_atom_site["label_seq_id"]];
+        if (_atom_site.count("pdbx_PDB_ins_code") && 
+            line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+            resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+        else resi+=" ";
+        if (resi.size()>5)
+        {
+            cerr<<"WARNING! Cannot parse line due to long residue index\n"<<line<<endl;
+            continue;
+        }
+
+        if (_atom_site.count("auth_asym_id"))
+             asym_id=line_vec[_atom_site["auth_asym_id"]];
+        else asym_id=line_vec[_atom_site["label_asym_id"]];
+        if (asym_id==".") asym_id=" ";
+        if (chain_opt.size() && asym_id!=chain_opt &&
+            !(asym_id==" " && (chain_opt=="_" || chain_opt=="."))) continue;
+            
+        if (_atom_site.count("pdbx_PDB_model_num") && 
+            model_index!=line_vec[_atom_site["pdbx_PDB_model_num"]])
+        {
+            if (PDB_lines.size()) break;
+            model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+        }
+
+        if (prev_asym_id!=asym_id)
+        {
+            PDB_lines.push_back(tmp_str_vec);
+            chainID_list.push_back(asym_id);
+            prev_asym_id=asym_id;
+        }
+
+        a++;
+        a%=100000;
+        i8_stream<<group_PDB
+            <<setw(5)<<a<<" "<<atom<<" "<<resn<<" "<<asym_id[asym_id.size()-1]
+            <<setw(5)<<resi<<"   "
+            <<setw(8)<<line_vec[_atom_site["Cartn_x"]].substr(0,8)
+            <<setw(8)<<line_vec[_atom_site["Cartn_y"]].substr(0,8)
+            <<setw(8)<<line_vec[_atom_site["Cartn_z"]].substr(0,8);
+        if (_atom_site.count("B_iso_or_equiv"))
+        {
+            i8_stream<<"  1.00"<<setw(6)<<line_vec[_atom_site["B_iso_or_equiv"]].substr(0,6);
+            if (_atom_site.count("type_symbol"))
+                i8_stream<<setw(12)<<line_vec[_atom_site["type_symbol"]].substr(0,12);
+        }
+        i8_stream<<endl;
+        PDB_lines.back().push_back(i8_stream.str());
+        i8_stream.str(string());
+    }
+    group_PDB.clear();
+    _atom_site.clear();
+    line_vec.clear();
+    alt_id.clear();
+    asym_id.clear();
+    resn.clear();
+
+    if (compress_type>=0)
+    {
+        if (compress_type) fin_gz.close();
+        else               fin.close();
+    }
+    line.clear();
+    chainID_list.push_back("");
+    return PDB_lines.size();
+}
+
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2) print_help();
+
+    /**********************/
+    /*    get argument    */
+    /**********************/
+    string xname       = "";
+    string yname       = "";
+
+    int    split_opt =0;     // do not split chain
+    int    het_opt   =0;     // do not read HETATM residues
+    int    mol_opt   =7;     // auto-detect the molecule type as protein/RNA
+    string chain_opt ="";    // read all chains
+
+    for(int i = 1; i < argc; i++)
+    {
+        if ( !strcmp(argv[i],"-split") && i < (argc-1) )
+        {
+            split_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-mol") && i < (argc-1) )
+        {
+            mol_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-chain") && i < (argc-1) )
+        {
+            chain_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
+        else if (xname.size() == 0) xname=argv[i];
+        else if (yname.size() == 0) yname=argv[i];
+        else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
+    }
+
+    if(yname.size()==0)
+    {
+        if (xname.size()==0)
+            PrintErrorAndQuit("Please provide input structures");
+        else if (yname.size()==0) yname="-";
+    }
+
+    bool dna_opt=(mol_opt>=4);
+    mol_opt %= 4;
+    bool rna_opt=(mol_opt>=2);
+    mol_opt %= 2;
+    bool protein_opt=(mol_opt>=1);
+
+    if (split_opt<0 || split_opt>1)
+        PrintErrorAndQuit("-split can only be 0 or 1");
+    if (het_opt<0 || het_opt>3)
+        PrintErrorAndQuit("-het can only be 0, 1, 2, or 3");
+
+    bool hoh_opt=(het_opt==3);
+    bool lig_opt=(het_opt>=2);
+    bool mse_opt=(het_opt>=1);
+
+    /* parse structure */
+    vector<vector<string> >PDB_lines;
+    vector<string> chainID_list;
+    get_all_mmcif_lines(xname, chain_opt, PDB_lines, chainID_list,
+        dna_opt, rna_opt, protein_opt, hoh_opt, lig_opt, mse_opt);
+    if (!split_opt) resolve_chainID_for_mmcif(PDB_lines,chainID_list);
+    write_mmcif_to_pdb(yname, PDB_lines, chainID_list, split_opt);
+    
+    /* clean up */
+    vector<vector<string> >().swap(PDB_lines);
+    vector<string>().swap(chainID_list);
+    chain_opt.clear();
+    return 0;
+}
diff --git a/modules/bindings/src/USalign/flexalign.h b/modules/bindings/src/USalign/flexalign.h
new file mode 100644
index 000000000..f982fa921
--- /dev/null
+++ b/modules/bindings/src/USalign/flexalign.h
@@ -0,0 +1,1826 @@
+/* Functions for the core TMalign algorithm, including the entry function
+ * flexalign_main */
+#ifndef flexalign_h
+#define flexalign_h 1
+
+#include "TMalign.h"
+
+void t_u2tu(double t0[3],double u0[3][3], vector<double> &tu_tmp)
+{
+    int i,j,k;
+    for (i=0;i<3;i++) tu_tmp[i]=t0[i];
+    k=3;
+    for (i=0;i<3;i++) for (j=0;j<3;j++)
+    {
+        tu_tmp[k]=u0[i][j];
+        k++;
+    }
+}
+
+void tu2t_u(vector<double> tu_tmp, double t0[3],double u0[3][3])
+{
+    int i,j,k;
+    for (i=0;i<3;i++) t0[i]=tu_tmp[i];
+    k=3;
+    for (i=0;i<3;i++) for (j=0;j<3;j++)
+    {
+        u0[i][j]=tu_tmp[k];
+        k++;
+    }
+}
+
+void aln2invmap(const string &seqxA, const string &seqyA, int *invmap)
+{
+    int i,j,r;
+    int ylen=0;
+    for (r=0;r<seqyA.size();r++) ylen+=seqyA[r]!='-';
+    for(j=0; j<ylen; j++) invmap[j]=-1;
+
+    i=j=-1;
+    for (r=0;r<seqxA.size();r++)
+    {
+        i+=seqxA[r]!='-';
+        j+=seqyA[r]!='-';
+        if (seqxA[r]!='-' && seqyA[r]!='-') invmap[j]=i;
+    }
+}
+
+int flexalign_main(double **xa, double **ya,
+    const char *seqx, const char *seqy, const char *secx, const char *secy,
+    double t0[3], double u0[3][3], vector<vector<double> >&tu_vec,
+    double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
+    double &d0_0, double &TM_0,
+    double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
+    string &seqM, string &seqxA, string &seqyA,
+    double &rmsd0, int &L_ali, double &Liden,
+    double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
+    const int xlen, const int ylen,
+    const vector<string> sequence, const double Lnorm_ass,
+    const double d0_scale, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const bool fast_opt,
+    const int mol_type, const int hinge_opt)
+{
+    vector<double> tu_tmp(12,0);
+    int round2=tu_vec.size();
+    if (round2==0)
+    {
+        TMalign_main(xa, ya, seqx, seqy, secx, secy, t0, u0,
+            TM1, TM2, TM3, TM4, TM5, d0_0, TM_0,
+            d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+            xlen, ylen, sequence, Lnorm_ass,
+            d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type);
+    
+        t_u2tu(t0,u0,tu_tmp);
+        tu_vec.push_back(tu_tmp);
+    }
+    
+    int i,j,r;
+    int* invmap=new int[ylen+1];
+    for (j=0;j<ylen+1;j++) invmap[j]=-1;
+    double **xt;
+    NewArray(&xt, xlen, 3);
+    do_rotation(xa, xt, xlen, t0, u0);
+
+    TM1= TM2= TM3= TM4= TM5=rmsd0=0;
+    seqM="";
+    seqxA="";
+    seqyA="";
+    n_ali=n_ali8=0;
+    se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, d0_0, TM_0,
+        d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt,
+        a_opt, u_opt, d_opt, mol_type, 0, invmap, 1);
+    if (round2)
+    {
+        /* aligned structure A vs unaligned structure B */
+        int xlen_h=n_ali8;
+        int ylen_h=ylen - n_ali8;
+        char *seqx_h = new char[xlen + 1];
+        char *seqy_h = new char[ylen + 1];
+        char *secx_h = new char[xlen + 1];
+        char *secy_h = new char[ylen + 1];
+        seqx_h[xlen]=seqy_h[ylen]=0;
+        secx_h[xlen]=secy_h[ylen]=0;
+        double **xa_h, **ya_h;
+        NewArray(&xa_h, xlen, 3);
+        NewArray(&ya_h, ylen, 3);
+
+        int r1,r2;
+        i=j=-1;
+        r1=r2=0;
+        for (r=0;r<seqxA.size();r++)
+        {
+            i+=(seqxA[r]!='-');
+            j+=(seqyA[r]!='-');
+            if (seqxA[r]!='-' && seqyA[r]!='-')
+            {
+                seqx_h[r1]=seqx[i];
+                secx_h[r1]=secx[i];
+                xa_h[r1][0]=xa[i][0];
+                xa_h[r1][1]=xa[i][1];
+                xa_h[r1][2]=xa[i][2];
+                r1++;
+            }
+            if (seqxA[r]=='-')
+            {
+                seqy_h[r2]=seqx[j];
+                secy_h[r2]=secx[j];
+                ya_h[r2][0]=ya[j][0];
+                ya_h[r2][1]=ya[j][1];
+                ya_h[r2][2]=ya[j][2];
+                r2++;
+            }
+        }
+        
+        double TM1_h, TM2_h;
+        double TM3_h, TM4_h, TM5_h;     // for a_opt, u_opt, d_opt
+        double d0_0_h, TM_0_h;
+        double d0A_h, d0B_h, d0u_h, d0a_h;
+        double d0_out_h=5.0;
+        string seqM_h, seqxA_h, seqyA_h;// for output alignment
+        double rmsd0_h = 0.0;
+        int L_ali_h=0;                // Aligned length in standard_TMscore
+        double Liden_h=0;
+        double TM_ali_h, rmsd_ali_h;  // TMscore and rmsd in standard_TMscore
+        int n_ali_h=0;
+        int n_ali8_h=0;
+
+        TMalign_main(xa_h, ya_h, seqx_h, seqy_h, secx_h, secy_h, t0, u0,
+            TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, d0_0_h, TM_0_h,
+            d0A_h, d0B_h, d0u_h, d0a_h, d0_out_h, seqM_h, seqxA_h, seqyA_h,
+            rmsd0_h, L_ali_h, Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h,
+            xlen_h, ylen_h, sequence, Lnorm_ass,
+            d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type);
+        
+        do_rotation(xa, xt, xlen, t0, u0);
+        t_u2tu(t0,u0,tu_vec[0]);
+        
+        int* invmap_h=new int[ylen+1];
+        for (j=0;j<ylen+1;j++) invmap_h[j]=-1;
+        TM1_h= TM2_h= TM3_h= TM4_h= TM5_h=rmsd0_h=0;
+        seqM_h="";
+        seqxA_h="";
+        seqyA_h="";
+        n_ali_h=n_ali8_h=0;
+        se_main(xt, ya, seqx, seqy, TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, d0_0,
+            TM_0, d0A, d0B, d0u, d0a, d0_out, seqM_h, seqxA_h, seqyA_h,
+            rmsd0_h, L_ali, Liden, TM_ali, rmsd_ali, n_ali_h, n_ali8_h,
+            xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt,
+            a_opt, u_opt, d_opt, mol_type, 0, invmap_h, 1);
+        
+        /* unaligned structure A vs aligned structure B */
+        xlen_h=xlen - n_ali8;
+        ylen_h=n_ali8;
+
+        i=j=-1;
+        r1=r2=0;
+        for (r=0;r<seqxA.size();r++)
+        {
+            i+=(seqxA[r]!='-');
+            j+=(seqyA[r]!='-');
+            if (seqyA[r]=='-')
+            {
+                seqx_h[r1]=seqx[i];
+                secx_h[r1]=secx[i];
+                xa_h[r1][0]=xa[i][0];
+                xa_h[r1][1]=xa[i][1];
+                xa_h[r1][2]=xa[i][2];
+                r1++;
+            }
+            if (seqxA[r]!='-' && seqyA[r]!='-')
+            {
+                seqy_h[r2]=seqx[j];
+                secy_h[r2]=secx[j];
+                ya_h[r2][0]=ya[j][0];
+                ya_h[r2][1]=ya[j][1];
+                ya_h[r2][2]=ya[j][2];
+                r2++;
+            }
+        }
+        
+        d0_out_h=5.0;
+        L_ali_h=Liden_h=0;
+        TM1= TM2= TM3= TM4= TM5=rmsd0=0;
+        seqM="";
+        seqxA="";
+        seqyA="";
+        n_ali=n_ali8=0;
+
+        TMalign_main(xa_h, ya_h, seqx_h, seqy_h, secx_h, secy_h, t0, u0,
+            TM1, TM2, TM3, TM4, TM5, d0_0_h, TM_0_h,
+            d0A_h, d0B_h, d0u_h, d0a_h, d0_out_h, seqM, seqxA, seqyA,
+            rmsd0, L_ali_h, Liden_h, TM_ali_h, rmsd_ali_h, n_ali, n_ali8,
+            xlen_h, ylen_h, sequence, Lnorm_ass,
+            d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type);
+        
+        do_rotation(xa, xt, xlen, t0, u0);
+        
+        for (j=0;j<ylen+1;j++) invmap[j]=-1;
+        TM1= TM2= TM3= TM4= TM5=rmsd0=0;
+        seqM="";
+        seqxA="";
+        seqyA="";
+        n_ali=n_ali8=0;
+        se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5, d0_0,
+            TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+            xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt,
+            a_opt, u_opt, d_opt, mol_type, 0, invmap, 1);
+
+        double TM_h=(TM1_h>TM2_h)?TM1_h:TM2_h;
+        double TM  =(TM1  >TM2  )?TM1  :TM2  ;
+        if (TM_h>TM)
+        {
+            TM1=TM1_h;
+            TM2=TM2_h;
+            TM3=TM3_h;
+            TM4=TM4_h;
+            TM5=TM5_h;
+            seqM=seqM_h;
+            seqxA=seqxA_h;
+            seqyA=seqyA_h;
+            rmsd0=rmsd0_h;
+            n_ali=n_ali_h;
+            n_ali8=n_ali8_h;
+            for (j=0;j<ylen+1;j++) invmap[j]=invmap_h[j];
+        }
+        else t_u2tu(t0,u0,tu_vec[0]);
+        
+        /* clean up */
+        delete [] invmap_h;
+        DeleteArray(&xa_h, xlen);
+        DeleteArray(&ya_h, ylen);
+        seqM_h.clear();
+        seqxA_h.clear();
+        seqyA_h.clear();
+        delete [] seqx_h;
+        delete [] secx_h;
+        delete [] seqy_h;
+        delete [] secy_h;
+    }
+    for (r=0;r<seqM.size();r++) if (seqM[r]=='1') seqM[r]='0';
+
+    int minlen = min(xlen, ylen);
+    int hinge;
+    for (hinge=0;hinge<hinge_opt;hinge++)
+    {
+        if (minlen-n_ali8<5) break;
+        int xlen_h=xlen - n_ali8;
+        int ylen_h=ylen - n_ali8;
+        char *seqx_h = new char[xlen_h + 1];
+        char *seqy_h = new char[ylen_h + 1];
+        char *secx_h = new char[xlen_h + 1];
+        char *secy_h = new char[ylen_h + 1];
+        seqx_h[xlen_h]=seqy_h[ylen_h]=0;
+        secx_h[xlen_h]=secy_h[ylen_h]=0;
+        double **xa_h, **ya_h;
+        NewArray(&xa_h, xlen_h, 3);
+        NewArray(&ya_h, ylen_h, 3);
+        vector<int> r1toi(xlen_h,0);
+        vector<int> r2toj(ylen_h,0);
+
+        int r1,r2;
+        i=j=-1;
+        r1=r2=0;
+        for (r=0;r<seqxA.size();r++)
+        {
+            i+=(seqxA[r]!='-');
+            j+=(seqyA[r]!='-');
+            if (seqyA[r]=='-')
+            {
+                seqx_h[r1]=seqx[i];
+                secx_h[r1]=secx[i];
+                xa_h[r1][0]=xa[i][0];
+                xa_h[r1][1]=xa[i][1];
+                xa_h[r1][2]=xa[i][2];
+                r1toi[r1]=i;
+                r1++;
+            }
+            if (seqxA[r]=='-')
+            {
+                seqy_h[r2]=seqx[j];
+                secy_h[r2]=secx[j];
+                ya_h[r2][0]=ya[j][0];
+                ya_h[r2][1]=ya[j][1];
+                ya_h[r2][2]=ya[j][2];
+                r2toj[r2]=j;
+                r2++;
+            }
+        }
+        
+        double TM1_h, TM2_h;
+        double TM3_h, TM4_h, TM5_h;     // for a_opt, u_opt, d_opt
+        double d0_0_h, TM_0_h;
+        double d0A_h, d0B_h, d0u_h, d0a_h;
+        double d0_out_h=5.0;
+        string seqM_h, seqxA_h, seqyA_h;// for output alignment
+        double rmsd0_h = 0.0;
+        int L_ali_h=0;                // Aligned length in standard_TMscore
+        double Liden_h=0;
+        double TM_ali_h, rmsd_ali_h;  // TMscore and rmsd in standard_TMscore
+        int n_ali_h=0;
+        int n_ali8_h=0;
+
+        TMalign_main(xa_h, ya_h, seqx_h, seqy_h, secx_h, secy_h, t0, u0,
+            TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, d0_0_h, TM_0_h,
+            d0A_h, d0B_h, d0u_h, d0a_h, d0_out_h, seqM_h, seqxA_h, seqyA_h,
+            rmsd0_h, L_ali_h, Liden_h, TM_ali_h, rmsd_ali_h, n_ali_h, n_ali8_h,
+            xlen_h, ylen_h, sequence, Lnorm_ass,
+            d0_scale, i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type);
+        
+        do_rotation(xa, xt, xlen, t0, u0);
+        
+        TM1_h=TM1;
+        TM2_h=TM2;
+        TM3_h=TM3;
+        TM4_h=TM4;
+        TM5_h=TM5;
+        seqM_h=seqM;
+        seqxA_h=seqxA;
+        seqyA_h=seqyA;
+        rmsd0_h=rmsd0;
+        n_ali_h=n_ali;
+        n_ali8_h=n_ali8;
+        int* invmap_h=new int[ylen+1];
+        for (j=0;j<ylen+1;j++) invmap_h[j]=invmap[j];
+        se_main(xt, ya, seqx, seqy, TM1_h, TM2_h, TM3_h, TM4_h, TM5_h, d0_0, TM_0,
+            d0A, d0B, d0u, d0a, d0_out, seqM_h, seqxA_h, seqyA_h,
+            rmsd0_h, L_ali, Liden, TM_ali, rmsd_ali, n_ali_h, n_ali8_h,
+            xlen, ylen, sequence, Lnorm_ass, d0_scale, i_opt,
+            a_opt, u_opt, d_opt, mol_type, 0, invmap_h, hinge+1);
+        int new_ali=0;
+        for (r=0;r<seqM_h.size();r++) new_ali+=(seqM_h[r]==hinge+'1');
+        if (n_ali8_h - n_ali8<5) new_ali=0;
+        if (new_ali>=5)
+        {
+            TM1=TM1_h;
+            TM2=TM2_h;
+            TM3=TM3_h;
+            TM4=TM4_h;
+            TM5=TM5_h;
+            seqM=seqM_h;
+            seqxA=seqxA_h;
+            seqyA=seqyA_h;
+            rmsd0=rmsd0_h;
+            n_ali=n_ali_h;
+            n_ali8=n_ali8_h;
+            t_u2tu(t0,u0,tu_tmp);
+            tu_vec.push_back(tu_tmp);
+            for (j=0;j<ylen+1;j++) invmap[j]=invmap_h[j];
+            //cout<<">hinge="<<hinge<<'\n'
+                //<<seqxA<<'\n'<<seqM<<'\n'<<seqyA<<endl;
+            //for (j=0;j<ylen;j++) if ((i=invmap[j])>=0) cout<<"("<<i<<","<<j<<")";
+            //cout<<endl;
+        }
+        
+        /* clean up */
+        delete [] invmap_h;
+        DeleteArray(&xa_h, xlen_h);
+        DeleteArray(&ya_h, ylen_h);
+        r1toi.clear();
+        r2toj.clear();
+        seqM_h.clear();
+        seqxA_h.clear();
+        seqyA_h.clear();
+        delete [] seqx_h;
+        delete [] secx_h;
+        delete [] seqy_h;
+        delete [] secy_h;
+        if (new_ali<5) break;
+    }
+
+    if (tu_vec.size()<=1)
+    {
+        DeleteArray(&xt, xlen);
+        delete[] invmap;
+        return tu_vec.size();
+    }
+    
+    /* re-derive alignment based on tu_vec */
+    vector<char> seqM_char(ylen,' ');
+    vector<double> di_vec(ylen,-1);
+    double d;
+    for (hinge=tu_vec.size()-1;hinge>=0;hinge--)
+    {
+        tu2t_u(tu_vec[hinge],t0,u0);
+        do_rotation(xa, xt, xlen, t0, u0);
+        for (j=0;j<ylen;j++)
+        {
+            i=invmap[j];
+            if (i<0) continue;
+            d=sqrt(dist(xt[i], ya[j]));
+            if (di_vec[j]<0 || d<=di_vec[j])
+            {
+                di_vec[j]=d;
+                seqM_char[j]=hinge+'0';
+            }
+        }
+    }
+    j=-1;
+    for (r=0;r<seqM.size();r++)
+    {
+        if (seqyA[r]=='-') continue;
+        j++;
+        seqM[r]=seqM_char[j];
+    }
+
+    /* smooth out AFP assignment: remove singleton insert */
+    for (hinge=tu_vec.size()-1;hinge>=0;hinge--)
+    {
+        j=-1;
+        for (r=0;r<seqM.size();r++)
+        {
+            if (seqyA[r]=='-') continue;
+            j++;
+            if (seqM_char[j]!=hinge+'0') continue;
+            if (r<seqM.size()-1 && (seqM[r+1]==hinge+'0' || seqM[r+1]==' '))
+                continue;
+            if (r>0 && (seqM[r-1]==hinge+'0' || seqM[r-1]==' ')) continue;
+            if (r<seqM.size()-1 && r>0 && seqM[r-1]!=seqM[r+1]) continue;
+            if (r>0) seqM[r]=seqM_char[j]=seqM[r-1];
+            else     seqM[r]=seqM_char[j]=seqM[r+1];
+        }
+    }
+    /* smooth out AFP assignment: remove singleton at the end of fragment */
+    char left_hinge=' ';
+    char right_hinge=' ';
+    for (hinge=tu_vec.size()-1;hinge>=0;hinge--)
+    {
+        j=-1;
+        for (r=0;r<seqM.size();r++)
+        {
+            if (seqyA[r]=='-') continue;
+            j++;
+            if (seqM[r]!=hinge+'0') continue;
+            if (r>0 && seqM[r-1]==' ' && r<seqM.size()-1 && seqM[r+1]==' ')
+                continue;
+            
+            left_hinge=' ';
+            for (i=r-1;i>=0;i--)
+            {
+                if (seqM[i]==' ') continue;
+                left_hinge=seqM[i];
+                break;
+            }
+            if (left_hinge==hinge+'0') continue;
+            
+            right_hinge=' ';
+            for (i=r+1;i<seqM.size();i++)
+            {
+                if (seqM[i]==' ') continue;
+                right_hinge=seqM[i];
+                break;
+            }
+            if (right_hinge==hinge+'0') continue;
+            if (left_hinge!=right_hinge && left_hinge!=' ' && right_hinge!=' ')
+                continue;
+            
+            if     (right_hinge!=' ') seqM[r]=seqM_char[j]=right_hinge;
+            else if (left_hinge!=' ') seqM[r]=seqM_char[j]=left_hinge;
+        }
+    }
+    /* smooth out AFP assignment: remove dimer insert */
+    for (hinge=tu_vec.size()-1;hinge>=0;hinge--)
+    {
+        j=-1;
+        for (r=0;r<seqM.size()-1;r++)
+        {
+            if (seqyA[r]=='-') continue;
+            j++;
+            if (seqM[r]  !=hinge+'0'|| seqM[r+1]!=hinge+'0') continue;
+            
+            if (r<seqM.size()-2 && (seqM[r+2]==' ' || seqM[r+2]==hinge+'0'))
+                continue;
+            if (r>0 && (seqM[r-1]==' ' || seqM[r-1]==hinge+'0')) continue;
+            if (r<seqM.size()-2 && r>0 && seqM[r-1]!=seqM[r+2]) continue;
+
+            if (r>0) seqM[r]=seqM_char[j]=seqM[r+1]=seqM_char[j+1]=seqM[r-1];
+            else     seqM[r]=seqM_char[j]=seqM[r+1]=seqM_char[j+1]=seqM[r+2];
+        }
+    }
+    /* smooth out AFP assignment: remove disconnected singleton */
+    int i1,i2;
+    for (hinge=tu_vec.size()-1;hinge>=0;hinge--)
+    {
+        j=-1;
+        for (r=0;r<seqM.size();r++)
+        {
+            if (seqyA[r]=='-') continue;
+            j++;
+            if (seqM[r]!=hinge+'0') continue;
+            
+            left_hinge=' ';
+            for (i=r-1;i>=0;i--)
+            {
+                if (seqM[i]==' ') continue;
+                left_hinge=seqM[i];
+                i1=(r-i);
+                break;
+            }
+            if (left_hinge==hinge+'0') continue;
+            
+            right_hinge=' ';
+            for (i=r+1;i<seqM.size();i++)
+            {
+                if (seqM[i]==' ') continue;
+                right_hinge=seqM[i];
+                i2=(i-r);
+                break;
+            }
+            if (right_hinge==hinge+'0') continue;
+            
+            if (right_hinge==' ') seqM[r]=seqM_char[j]=left_hinge;
+            else if (left_hinge==' ') seqM[r]=seqM_char[j]=right_hinge;
+            else
+            {
+                if (i1<i2) seqM[r]=seqM_char[j]=left_hinge;
+                else       seqM[r]=seqM_char[j]=right_hinge;
+            }
+        }
+    }
+    
+    /* recalculate all scores */
+    for (hinge=tu_vec.size()-1;hinge>=0;hinge--)
+    {
+        tu2t_u(tu_vec[hinge],t0,u0);
+        do_rotation(xa, xt, xlen, t0, u0);
+        for (j=0;j<ylen;j++)
+        {
+            i=invmap[j];
+            if (i<0) continue;
+            if (seqM_char[j]!=hinge+'0') continue;
+            d=sqrt(dist(xt[i], ya[j]));
+            if (di_vec[j]<0 || d<=di_vec[j])
+            {
+                di_vec[j]=d;
+                seqM_char[j]=hinge+'0';
+            }
+        }
+    }
+    rmsd0=TM1=TM2=TM3=TM4=TM5=0;
+    Liden=0;
+    for (r=0;r<seqM.size();r++) if (seqM[r]!=' ') Liden+=seqxA[r]==seqyA[r];
+    for(j=0; j<ylen; j++)
+    {
+        i=invmap[j];
+        if(i<0) continue;
+        {
+            d=di_vec[j];
+            TM2+=1/(1+(d/d0B)*(d/d0B)); // chain_1
+            TM1+=1/(1+(d/d0A)*(d/d0A)); // chain_2
+            if (a_opt) TM3+=1/(1+(d/d0a)*(d/d0a)); // -a
+            if (u_opt) TM4+=1/(1+(d/d0u)*(d/d0u)); // -u
+            if (d_opt) TM5+=1/(1+(d/d0_scale)*(d/d0_scale)); // -d
+            rmsd0+=d*d;
+        }
+    }
+    TM2/=xlen;
+    TM1/=ylen;
+    TM3/=(xlen+ylen)*0.5;
+    TM4/=Lnorm_ass;
+    TM5/=ylen;
+    if (n_ali8) rmsd0=sqrt(rmsd0/n_ali8);
+    for (hinge=tu_vec.size()-1;hinge>0;hinge--)
+    {
+        int afp_len=0;
+        for (r=0;r<seqM.size();r++) afp_len+=seqM[r]==hinge+'0';
+        if (afp_len) break;
+        tu_vec.pop_back(); // remove unnecessary afp
+    }
+
+    /* clean up */
+    seqM_char.clear();
+    di_vec.clear();
+    DeleteArray(&xt, xlen);
+    delete[] invmap;
+    return tu_vec.size();
+}
+
+/* extract rotation matrix based on TMscore8 */
+void output_flexalign_rotation_matrix(const char* fname_matrix,
+    const vector<vector<double> >&tu_vec, double t[3], double u[3][3])
+{
+    stringstream ss;
+    char dest[1000];
+    for (int hinge=0;hinge<tu_vec.size();hinge++)
+    {
+        tu2t_u(tu_vec[hinge],t,u);
+        ss << "------ The rotation matrix to rotate Structure_1 to Structure_2 ------\n";
+        sprintf(dest, "m %18s %14s %14s %14s\n", "t[m]", "u[m][0]", "u[m][1]", "u[m][2]");
+        ss << string(dest);
+        for (int k = 0; k < 3; k++)
+        {
+            sprintf(dest, "%d %18.10f %14.10f %14.10f %14.10f\n", k, t[k], u[k][0], u[k][1], u[k][2]);
+            ss << string(dest);
+        }
+    }
+    ss << "\nCode for rotating Structure 1 from (x,y,z) to (X,Y,Z):\n"
+            "for(i=0; i<L; i++)\n"
+            "{\n"
+            "   X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n"
+            "   Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n"
+            "   Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n"
+            "}\n";
+    if (strcmp(fname_matrix,(char *)("-"))==0)
+       cout<<ss.str();
+    else
+    {
+        fstream fout;
+        fout.open(fname_matrix, ios::out | ios::trunc);
+        if (fout)
+        {
+            fout<<ss.str();
+            fout.close();
+        }
+        else cout << "Open file to output rotation matrix fail.\n";
+    }
+    ss.str(string());
+}
+
+void output_flexalign_rasmol(const string xname, const string yname,
+    const string fname_super,const vector<vector<double> >&tu_vec,
+    double t[3], double u[3][3], const int ter_opt,
+    const int mm_opt, const int split_opt, const int mirror_opt,
+    const char *seqM, const char *seqxA, const char *seqyA,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2,
+    const string chainID1, const string chainID2,
+    const int xlen, const int ylen, const double d0A, const int n_ali8,
+    const double rmsd, const double TM1, const double Liden)
+{
+    stringstream buf;
+    stringstream buf_all;
+    stringstream buf_atm;
+    stringstream buf_all_atm;
+    stringstream buf_all_atm_lig;
+    //stringstream buf_pdb;
+    stringstream buf_tm;
+    string line;
+    double x[3];  // before transform
+    double x1[3]; // after transform
+    bool after_ter; // true if passed the "TER" line in PDB
+    string asym_id; // chain ID
+    
+    map<string,int> resi2hinge_dict;
+    int r,i,j;
+    j=-1;
+    char hinge_char=0;
+    int ali_len=strlen(seqM);
+    for (r=0;r<strlen(seqxA);r++)
+    {
+        if (seqxA[r]=='-') continue;
+        j++;
+        hinge_char=seqM[r];
+        if (hinge_char==' ')
+        {
+            for (i=1;i<ali_len;i++)
+            {
+                if (r-i>=0 && seqM[r-i]!=' ')
+                    hinge_char=seqM[r-i];
+                else if (r+i<xlen && seqM[r+i]!=' ')
+                    hinge_char=seqM[r+i];
+                if (hinge_char!=' ') break;
+            }
+        }
+        resi2hinge_dict[resi_vec1[j]]=hinge_char-'0';
+    }
+    string resi=resi_vec1[0];
+    int read_resi=resi.size()-4;
+
+    buf_tm<<"REMARK US-align"
+        <<"\nREMARK Structure 1:"<<setw(11)<<left<<xname+chainID1<<" Size= "<<xlen
+        <<"\nREMARK Structure 2:"<<setw(11)<<yname+chainID2<<right<<" Size= "<<ylen
+        <<" (TM-score is normalized by "<<setw(4)<<ylen<<", d0="
+        <<setiosflags(ios::fixed)<<setprecision(2)<<setw(6)<<d0A<<")"
+        <<"\nREMARK Aligned length="<<setw(4)<<n_ali8<<", RMSD="
+        <<setw(6)<<setiosflags(ios::fixed)<<setprecision(2)<<rmsd
+        <<", TM-score="<<setw(7)<<setiosflags(ios::fixed)<<setprecision(5)<<TM1
+        <<", ID="<<setw(5)<<setiosflags(ios::fixed)<<setprecision(3)
+        <<((n_ali8>0)?Liden/n_ali8:0)<<endl;
+    string rasmol_CA_header="load inline\nselect *A\nwireframe .45\nselect *B\nwireframe .20\nselect all\ncolor white\n";
+    string rasmol_cartoon_header="load inline\nselect all\ncartoon\nselect *A\ncolor blue\nselect *B\ncolor red\nselect ligand\nwireframe 0.25\nselect solvent\nspacefill 0.25\nselect all\nexit\n"+buf_tm.str();
+    if (!mm_opt) buf<<rasmol_CA_header;
+    buf_all<<rasmol_CA_header;
+    if (!mm_opt) buf_atm<<rasmol_cartoon_header;
+    buf_all_atm<<rasmol_cartoon_header;
+    buf_all_atm_lig<<rasmol_cartoon_header;
+
+    /* selecting chains for -mol */
+    string chain1_sele;
+    string chain2_sele;
+    if (!mm_opt)
+    {
+        if (split_opt==2 && ter_opt>=1) // align one chain from model 1
+        {
+            chain1_sele=chainID1.substr(1);
+            chain2_sele=chainID2.substr(1);
+        }
+        else if (split_opt==2 && ter_opt==0) // align one chain from each model
+        {
+            for (i=1;i<chainID1.size();i++) if (chainID1[i]==',') break;
+            chain1_sele=chainID1.substr(i+1);
+            for (i=1;i<chainID2.size();i++) if (chainID2[i]==',') break;
+            chain2_sele=chainID2.substr(i+1);
+        }
+    }
+
+
+    /* for PDBx/mmCIF only */
+    map<string,int> _atom_site;
+    int atom_site_pos;
+    vector<string> line_vec;
+    string atom; // 4-character atom name
+    string AA;   // 3-character residue name
+    string inscode; // 1-character insertion code
+    string model_index; // model index
+    bool is_mmcif=false;
+
+    /* used for CONECT record of chain1 */
+    int ca_idx1=0; // all CA atoms
+    int lig_idx1=0; // all atoms
+    vector <int> idx_vec;
+
+    /* used for CONECT record of chain2 */
+    int ca_idx2=0; // all CA atoms
+    int lig_idx2=0; // all atoms
+
+    /* extract aligned region */
+    vector<string> resi_aln1;
+    vector<string> resi_aln2;
+    int i1=-1;
+    int i2=-1;
+    if (!mm_opt)
+    {
+        for (i=0;i<strlen(seqM);i++)
+        {
+            i1+=(seqxA[i]!='-');
+            i2+=(seqyA[i]!='-');
+            if (seqM[i]==' ') continue;
+            resi_aln1.push_back(resi_vec1[i1].substr(0,4));
+            resi_aln2.push_back(resi_vec2[i2].substr(0,4));
+            if (seqM[i]!=':') continue;
+            buf    <<"select "<<resi_aln1.back()<<":A,"
+                   <<resi_aln2.back()<<":B\ncolor red\n";
+            buf_all<<"select "<<resi_aln1.back()<<":A,"
+                   <<resi_aln2.back()<<":B\ncolor red\n";
+        }
+        buf<<"select all\nexit\n"<<buf_tm.str();
+    }
+    buf_all<<"select all\nexit\n"<<buf_tm.str();
+
+    ifstream fin;
+    /* read first file */
+    after_ter=false;
+    asym_id="";
+    fin.open(xname.c_str());
+    int hinge=0;
+    while (fin.good())
+    {
+        getline(fin, line);
+        if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
+        if (is_mmcif==false && line.size()>=54 &&
+           (line.compare(0, 6, "ATOM  ")==0 ||
+            line.compare(0, 6, "HETATM")==0)) // PDB format
+        {
+            if (line[16]!='A' && line[16]!=' ') continue;
+            x[0]=atof(line.substr(30,8).c_str());
+            x[1]=atof(line.substr(38,8).c_str());
+            x[2]=atof(line.substr(46,8).c_str());
+            if (mirror_opt) x[2]=-x[2];
+            if (read_resi==1) resi=line.substr(22,5);
+            else resi=line.substr(22,5)+line[21];
+            hinge=0;
+            if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi];
+            tu2t_u(tu_vec[hinge],t,u);
+            transform(t, u, x, x1);
+            //buf_pdb<<line.substr(0,30)<<setiosflags(ios::fixed)
+                //<<setprecision(3)
+                //<<setw(8)<<x1[0] <<setw(8)<<x1[1] <<setw(8)<<x1[2]
+                //<<line.substr(54)<<'\n';
+
+            if (after_ter && line.compare(0,6,"ATOM  ")==0) continue;
+            lig_idx1++;
+            buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1
+                <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            if (chain1_sele.size() && line[21]!=chain1_sele[0]) continue;
+            if (after_ter || line.compare(0,6,"ATOM  ")) continue;
+            if (ter_opt>=2)
+            {
+                if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1)) 
+                {
+                    after_ter=true;
+                    continue;
+                }
+                asym_id=line[21];
+            }
+            buf_all_atm<<"ATOM  "<<setw(5)<<lig_idx1
+                <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            if (!mm_opt && find(resi_aln1.begin(),resi_aln1.end(),
+                line.substr(22,4))!=resi_aln1.end())
+            {
+                buf_atm<<"ATOM  "<<setw(5)<<lig_idx1
+                    <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                    <<setiosflags(ios::fixed)<<setprecision(3)
+                    <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            }
+            if (line.substr(12,4)!=" CA " && line.substr(12,4)!=" C3'") continue;
+            ca_idx1++;
+            buf_all<<"ATOM  "<<setw(5)<<ca_idx1<<' '
+                <<line.substr(12,4)<<' '<<line.substr(17,3)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
+            if (find(resi_aln1.begin(),resi_aln1.end(),
+                line.substr(22,4))==resi_aln1.end()) continue;
+            if (!mm_opt) buf<<"ATOM  "<<setw(5)<<ca_idx1<<' '
+                <<line.substr(12,4)<<' '<<line.substr(17,3)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
+            idx_vec.push_back(ca_idx1);
+        }
+        else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+        {
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                if (line.size()) break;
+            }
+            if (line.compare(0,11,"_atom_site.")) continue;
+            _atom_site.clear();
+            atom_site_pos=0;
+            _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                if (line.size()==0) continue;
+                if (line.compare(0,11,"_atom_site.")) break;
+                _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+            }
+
+            if (is_mmcif==false)
+            {
+                //buf_pdb.str(string());
+                is_mmcif=true;
+            }
+
+            while(1)
+            {
+                line_vec.clear();
+                split(line,line_vec);
+                if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                    line_vec[_atom_site["group_PDB"]]!="HETATM") break;
+                if (_atom_site.count("pdbx_PDB_model_num"))
+                {
+                    if (model_index.size() && model_index!=
+                        line_vec[_atom_site["pdbx_PDB_model_num"]])
+                        break;
+                    model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+                }
+
+                x[0]=atof(line_vec[_atom_site["Cartn_x"]].c_str());
+                x[1]=atof(line_vec[_atom_site["Cartn_y"]].c_str());
+                x[2]=atof(line_vec[_atom_site["Cartn_z"]].c_str());
+                if (mirror_opt) x[2]=-x[2];
+
+
+                if (_atom_site.count("auth_seq_id"))
+                    resi=line_vec[_atom_site["auth_seq_id"]];
+                else resi=line_vec[_atom_site["label_seq_id"]];
+                if (_atom_site.count("pdbx_PDB_ins_code") && 
+                    line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                    resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+                else resi+=" ";
+                if (read_resi>=2)
+                {
+                    if (_atom_site.count("auth_asym_id"))
+                        asym_id=line_vec[_atom_site["auth_asym_id"]];
+                    else asym_id=line_vec[_atom_site["label_asym_id"]];
+                    if (asym_id==".") asym_id=" ";
+                    resi+=asym_id[0];
+                }
+                hinge=0;
+                if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi];
+                tu2t_u(tu_vec[hinge],t,u);
+                transform(t, u, x, x1);
+
+                if (_atom_site.count("label_alt_id")==0 || 
+                    line_vec[_atom_site["label_alt_id"]]=="." ||
+                    line_vec[_atom_site["label_alt_id"]]=="A")
+                {
+                    atom=line_vec[_atom_site["label_atom_id"]];
+                    if (atom[0]=='"') atom=atom.substr(1);
+                    if (atom.size() && atom[atom.size()-1]=='"')
+                        atom=atom.substr(0,atom.size()-1);
+                    if      (atom.size()==0) atom="    ";
+                    else if (atom.size()==1) atom=" "+atom+"  ";
+                    else if (atom.size()==2) atom=" "+atom+" ";
+                    else if (atom.size()==3) atom=" "+atom;
+                    else if (atom.size()>=5) atom=atom.substr(0,4);
+            
+                    AA=line_vec[_atom_site["label_comp_id"]]; // residue name
+                    if      (AA.size()==1) AA="  "+AA;
+                    else if (AA.size()==2) AA=" " +AA;
+                    else if (AA.size()>=4) AA=AA.substr(0,3);
+                
+                    if (_atom_site.count("auth_seq_id"))
+                        resi=line_vec[_atom_site["auth_seq_id"]];
+                    else resi=line_vec[_atom_site["label_seq_id"]];
+                    while (resi.size()<4) resi=' '+resi;
+                    if (resi.size()>4) resi=resi.substr(0,4);
+                
+                    inscode=' ';
+                    if (_atom_site.count("pdbx_PDB_ins_code") && 
+                        line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                        inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+
+                    if (_atom_site.count("auth_asym_id"))
+                    {
+                        if (chain1_sele.size()) after_ter
+                            =line_vec[_atom_site["auth_asym_id"]]!=chain1_sele;
+                        else if (ter_opt>=2 && ca_idx1 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["auth_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["auth_asym_id"]];
+                    }
+                    else if (_atom_site.count("label_asym_id"))
+                    {
+                        if (chain1_sele.size()) after_ter
+                            =line_vec[_atom_site["label_asym_id"]]!=chain1_sele;
+                        if (ter_opt>=2 && ca_idx1 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["label_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["label_asym_id"]];
+                    }
+                    //buf_pdb<<left<<setw(6)
+                        //<<line_vec[_atom_site["group_PDB"]]<<right
+                        //<<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                        //<<AA<<" "<<asym_id[asym_id.size()-1]
+                        //<<resi<<inscode<<"   "
+                        //<<setiosflags(ios::fixed)<<setprecision(3)
+                        //<<setw(8)<<x1[0]
+                        //<<setw(8)<<x1[1]
+                        //<<setw(8)<<x1[2]<<'\n';
+
+                    if (after_ter==false ||
+                        line_vec[_atom_site["group_pdb"]]=="HETATM")
+                    {
+                        lig_idx1++;
+                        buf_all_atm_lig<<left<<setw(6)
+                            <<line_vec[_atom_site["group_PDB"]]<<right
+                            <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                            <<AA<<" A"<<resi<<inscode<<"   "
+                            <<setiosflags(ios::fixed)<<setprecision(3)
+                            <<setw(8)<<x1[0]
+                            <<setw(8)<<x1[1]
+                            <<setw(8)<<x1[2]<<'\n';
+                        if (after_ter==false &&
+                            line_vec[_atom_site["group_PDB"]]=="ATOM")
+                        {
+                            buf_all_atm<<"ATOM  "<<setw(6)
+                                <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                                <<AA<<" A"<<resi<<inscode<<"   "
+                                <<setiosflags(ios::fixed)<<setprecision(3)
+                                <<setw(8)<<x1[0]
+                                <<setw(8)<<x1[1]
+                                <<setw(8)<<x1[2]<<'\n';
+                            if (!mm_opt && find(resi_aln1.begin(),
+                                resi_aln1.end(),resi)!=resi_aln1.end())
+                            {
+                                buf_atm<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<lig_idx1%100000<<' '
+                                    <<atom<<' '<<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                            }
+                            if (atom==" CA " || atom==" C3'")
+                            {
+                                ca_idx1++;
+            //mm_opt, split_opt, mirror_opt, chainID1,chainID2);
+                                buf_all<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<ca_idx1%100000<<' '<<atom<<' '
+                                    <<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                                if (!mm_opt && find(resi_aln1.begin(),
+                                    resi_aln1.end(),resi)!=resi_aln1.end())
+                                {
+                                    buf<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<ca_idx1%100000<<' '<<atom<<' '
+                                    <<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                                    idx_vec.push_back(ca_idx1);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                while(1)
+                {
+                    if (fin.good()) getline(fin, line);
+                    else break;
+                    if (line.size()) break;
+                }
+            }
+        }
+        else if (line.size() && is_mmcif==false)
+        {
+            //buf_pdb<<line<<'\n';
+            if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+        }
+    }
+    fin.close();
+    if (!mm_opt) buf<<"TER\n";
+    buf_all<<"TER\n";
+    if (!mm_opt) buf_atm<<"TER\n";
+    buf_all_atm<<"TER\n";
+    buf_all_atm_lig<<"TER\n";
+    for (i=1;i<ca_idx1;i++) buf_all<<"CONECT"
+        <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
+    if (!mm_opt) for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
+        <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
+    idx_vec.clear();
+
+    /* read second file */
+    after_ter=false;
+    asym_id="";
+    fin.open(yname.c_str());
+    while (fin.good())
+    {
+        getline(fin, line);
+        if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
+        if (line.size()>=54 && (line.compare(0, 6, "ATOM  ")==0 ||
+            line.compare(0, 6, "HETATM")==0)) // PDB format
+        {
+            if (line[16]!='A' && line[16]!=' ') continue;
+            if (after_ter && line.compare(0,6,"ATOM  ")==0) continue;
+            lig_idx2++;
+            buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1+lig_idx2
+                <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            if (chain1_sele.size() && line[21]!=chain1_sele[0]) continue;
+            if (after_ter || line.compare(0,6,"ATOM  ")) continue;
+            if (ter_opt>=2)
+            {
+                if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1))
+                {
+                    after_ter=true;
+                    continue;
+                }
+                asym_id=line[21];
+            }
+            buf_all_atm<<"ATOM  "<<setw(5)<<lig_idx1+lig_idx2
+                <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            if (!mm_opt && find(resi_aln2.begin(),resi_aln2.end(),
+                line.substr(22,4))!=resi_aln2.end())
+            {
+                buf_atm<<"ATOM  "<<setw(5)<<lig_idx1+lig_idx2
+                    <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            }
+            if (line.substr(12,4)!=" CA " && line.substr(12,4)!=" C3'") continue;
+            ca_idx2++;
+            buf_all<<"ATOM  "<<setw(5)<<ca_idx1+ca_idx2<<' '<<line.substr(12,4)
+                <<' '<<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n';
+            if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4)
+                )==resi_aln2.end()) continue;
+            if (!mm_opt) buf<<"ATOM  "<<setw(5)<<ca_idx1+ca_idx2<<' '
+                <<line.substr(12,4)<<' '<<line.substr(17,3)<<" B"
+                <<line.substr(22,32)<<'\n';
+            idx_vec.push_back(ca_idx1+ca_idx2);
+        }
+        else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+        {
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
+                if (line.size()) break;
+            }
+            if (line.compare(0,11,"_atom_site.")) continue;
+            _atom_site.clear();
+            atom_site_pos=0;
+            _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
+                if (line.size()==0) continue;
+                if (line.compare(0,11,"_atom_site.")) break;
+                _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+            }
+
+            while(1)
+            {
+                line_vec.clear();
+                split(line,line_vec);
+                if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                    line_vec[_atom_site["group_PDB"]]!="HETATM") break;
+                if (_atom_site.count("pdbx_PDB_model_num"))
+                {
+                    if (model_index.size() && model_index!=
+                        line_vec[_atom_site["pdbx_PDB_model_num"]])
+                        break;
+                    model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+                }
+
+                if (_atom_site.count("label_alt_id")==0 || 
+                    line_vec[_atom_site["label_alt_id"]]=="." ||
+                    line_vec[_atom_site["label_alt_id"]]=="A")
+                {
+                    atom=line_vec[_atom_site["label_atom_id"]];
+                    if (atom[0]=='"') atom=atom.substr(1);
+                    if (atom.size() && atom[atom.size()-1]=='"')
+                        atom=atom.substr(0,atom.size()-1);
+                    if      (atom.size()==0) atom="    ";
+                    else if (atom.size()==1) atom=" "+atom+"  ";
+                    else if (atom.size()==2) atom=" "+atom+" ";
+                    else if (atom.size()==3) atom=" "+atom;
+                    else if (atom.size()>=5) atom=atom.substr(0,4);
+            
+                    AA=line_vec[_atom_site["label_comp_id"]]; // residue name
+                    if      (AA.size()==1) AA="  "+AA;
+                    else if (AA.size()==2) AA=" " +AA;
+                    else if (AA.size()>=4) AA=AA.substr(0,3);
+                
+                    if (_atom_site.count("auth_seq_id"))
+                        resi=line_vec[_atom_site["auth_seq_id"]];
+                    else resi=line_vec[_atom_site["label_seq_id"]];
+                    while (resi.size()<4) resi=' '+resi;
+                    if (resi.size()>4) resi=resi.substr(0,4);
+                
+                    inscode=' ';
+                    if (_atom_site.count("pdbx_PDB_ins_code") && 
+                        line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                        inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+                    
+                    if (_atom_site.count("auth_asym_id"))
+                    {
+                        if (chain2_sele.size()) after_ter
+                            =line_vec[_atom_site["auth_asym_id"]]!=chain2_sele;
+                        if (ter_opt>=2 && ca_idx2 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["auth_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["auth_asym_id"]];
+                    }
+                    else if (_atom_site.count("label_asym_id"))
+                    {
+                        if (chain2_sele.size()) after_ter
+                            =line_vec[_atom_site["label_asym_id"]]!=chain2_sele;
+                        if (ter_opt>=2 && ca_idx2 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["label_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["label_asym_id"]];
+                    }
+                    if (after_ter==false || 
+                        line_vec[_atom_site["group_PDB"]]=="HETATM")
+                    {
+                        lig_idx2++;
+                        buf_all_atm_lig<<left<<setw(6)
+                            <<line_vec[_atom_site["group_PDB"]]<<right
+                            <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                            <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                            <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                            <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                            <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                            <<'\n';
+                        if (after_ter==false &&
+                            line_vec[_atom_site["group_PDB"]]=="ATOM")
+                        {
+                            buf_all_atm<<"ATOM  "<<setw(6)
+                                <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                                <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                <<'\n';
+                            if (!mm_opt && find(resi_aln2.begin(),
+                                resi_aln2.end(),resi)!=resi_aln2.end())
+                            {
+                                buf_atm<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                                    <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                            }
+                            if (atom==" CA " || atom==" C3'")
+                            {
+                                ca_idx2++;
+                                buf_all<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(ca_idx1+ca_idx2)%100000
+                                    <<' '<<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                                if (!mm_opt && find(resi_aln2.begin(),
+                                    resi_aln2.end(),resi)!=resi_aln2.end())
+                                {
+                                    buf<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(ca_idx1+ca_idx2)%100000
+                                    <<' '<<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                                    idx_vec.push_back(ca_idx1+ca_idx2);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                if (fin.good()) getline(fin, line);
+                else break;
+            }
+        }
+        else if (line.size())
+        {
+            if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+        }
+    }
+    fin.close();
+    if (!mm_opt) buf<<"TER\n";
+    buf_all<<"TER\n";
+    if (!mm_opt) buf_atm<<"TER\n";
+    buf_all_atm<<"TER\n";
+    buf_all_atm_lig<<"TER\n";
+    for (i=ca_idx1+1;i<ca_idx1+ca_idx2;i++) buf_all<<"CONECT"
+        <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
+    for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
+        <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
+    idx_vec.clear();
+
+    /* write pymol script */
+    ofstream fp;
+    /*
+    stringstream buf_pymol;
+    vector<string> pml_list;
+    pml_list.push_back(fname_super+"");
+    pml_list.push_back(fname_super+"_atm");
+    pml_list.push_back(fname_super+"_all");
+    pml_list.push_back(fname_super+"_all_atm");
+    pml_list.push_back(fname_super+"_all_atm_lig");
+    for (i=0;i<pml_list.size();i++)
+    {
+        buf_pymol<<"#!/usr/bin/env pymol\n"
+            <<"load "<<pml_list[i]<<"\n"
+            <<"hide all\n"
+            <<((i==0 || i==2)?("show stick\n"):("show cartoon\n"))
+            <<"color blue, chain A\n"
+            <<"color red, chain B\n"
+            <<"set ray_shadow, 0\n"
+            <<"set stick_radius, 0.3\n"
+            <<"set sphere_scale, 0.25\n"
+            <<"show stick, not polymer\n"
+            <<"show sphere, not polymer\n"
+            <<"bg_color white\n"
+            <<"set transparency=0.2\n"
+            <<"zoom polymer\n"
+            <<endl;
+        fp.open((pml_list[i]+".pml").c_str());
+        fp<<buf_pymol.str();
+        fp.close();
+        buf_pymol.str(string());
+        pml_list[i].clear();
+    }
+    pml_list.clear();
+    */
+    
+    /* write rasmol script */
+    if (!mm_opt)
+    {
+        fp.open((fname_super).c_str());
+        fp<<buf.str();
+        fp.close();
+    }
+    fp.open((fname_super+"_all").c_str());
+    fp<<buf_all.str();
+    fp.close();
+    if (!mm_opt)
+    {
+        fp.open((fname_super+"_atm").c_str());
+        fp<<buf_atm.str();
+        fp.close();
+    }
+    fp.open((fname_super+"_all_atm").c_str());
+    fp<<buf_all_atm.str();
+    fp.close();
+    fp.open((fname_super+"_all_atm_lig").c_str());
+    fp<<buf_all_atm_lig.str();
+    fp.close();
+    //fp.open((fname_super+".pdb").c_str());
+    //fp<<buf_pdb.str();
+    //fp.close();
+
+    /* clear stream */
+    buf.str(string());
+    buf_all.str(string());
+    buf_atm.str(string());
+    buf_all_atm.str(string());
+    buf_all_atm_lig.str(string());
+    //buf_pdb.str(string());
+    buf_tm.str(string());
+    resi_aln1.clear();
+    resi_aln2.clear();
+    asym_id.clear();
+    line_vec.clear();
+    atom.clear();
+    AA.clear();
+    resi.clear();
+    inscode.clear();
+    model_index.clear();
+}
+
+void output_flexalign_pymol(const string xname, const string yname,
+    const string fname_super, const vector<vector<double> >&tu_vec,
+    double t[3], double u[3][3], const int ter_opt, 
+    const int mm_opt, const int split_opt, const int mirror_opt,
+    const char *seqM, const char *seqxA, const char *seqyA,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2,
+    const string chainID1, const string chainID2)
+{
+    int compress_type=0; // uncompressed file
+    ifstream fin;
+#ifndef REDI_PSTREAM_H_SEEN
+    ifstream fin_gz;
+#else
+    redi::ipstream fin_gz; // if file is compressed
+    if (xname.size()>=3 && 
+        xname.substr(xname.size()-3,3)==".gz")
+    {
+        fin_gz.open("gunzip -c "+xname);
+        compress_type=1;
+    }
+    else if (xname.size()>=4 && 
+        xname.substr(xname.size()-4,4)==".bz2")
+    {
+        fin_gz.open("bzcat "+xname);
+        compress_type=2;
+    }
+    else
+#endif
+        fin.open(xname.c_str());
+
+    map<string,int> resi2hinge_dict;
+    int r,i,j;
+    j=-1;
+    char hinge_char=0;
+    int xlen=resi_vec1.size();
+    int ali_len=strlen(seqM);
+    for (r=0;r<strlen(seqxA);r++)
+    {
+        if (seqxA[r]=='-') continue;
+        j++;
+        hinge_char=seqM[r];
+        if (hinge_char==' ')
+        {
+            for (i=1;i<ali_len;i++)
+            {
+                if (r-i>=0 && seqM[r-i]!=' ')
+                    hinge_char=seqM[r-i];
+                else if (r+i<xlen && seqM[r+i]!=' ')
+                    hinge_char=seqM[r+i];
+                if (hinge_char!=' ') break;
+            }
+        }
+        resi2hinge_dict[resi_vec1[j]]=hinge_char-'0';
+    }
+    string resi=resi_vec1[0];
+    int read_resi=resi.size()-4;
+
+    stringstream buf;
+    stringstream buf_pymol;
+    string line;
+    double x[3];  // before transform
+    double x1[3]; // after transform
+
+    /* for PDBx/mmCIF only */
+    map<string,int> _atom_site;
+    size_t atom_site_pos;
+    vector<string> line_vec;
+    int infmt=-1; // 0 - PDB, 3 - PDBx/mmCIF
+    int hinge=0;
+    string asym_id="."; // this is similar to chainID, except that
+                        // chainID is char while asym_id is a string
+                        // with possibly multiple char
+    while (compress_type?fin_gz.good():fin.good())
+    {
+        if (compress_type) getline(fin_gz, line);
+        else               getline(fin, line);
+        if (line.compare(0, 6, "ATOM  ")==0 || 
+            line.compare(0, 6, "HETATM")==0) // PDB format
+        {
+            infmt=0;
+            x[0]=atof(line.substr(30,8).c_str());
+            x[1]=atof(line.substr(38,8).c_str());
+            x[2]=atof(line.substr(46,8).c_str());
+            if (mirror_opt) x[2]=-x[2];
+            if (read_resi==1) resi=line.substr(22,5);
+            else resi=line.substr(22,5)+line[21];
+            hinge=0;
+            if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi];
+            tu2t_u(tu_vec[hinge],t,u);
+            transform(t, u, x, x1);
+            buf<<line.substr(0,30)<<setiosflags(ios::fixed)
+                <<setprecision(3)
+                <<setw(8)<<x1[0] <<setw(8)<<x1[1] <<setw(8)<<x1[2]
+                <<line.substr(54)<<'\n';
+        }
+        else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+        {
+            infmt=3;
+            buf<<line<<'\n';
+            while(1)
+            {
+                if (compress_type) 
+                {
+                    if (fin_gz.good()) getline(fin_gz, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                }
+                else
+                {
+                    if (fin.good()) getline(fin, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                }
+                if (line.size()) break;
+            }
+            buf<<line<<'\n';
+            if (line.compare(0,11,"_atom_site.")) continue;
+            _atom_site.clear();
+            atom_site_pos=0;
+            _atom_site[Trim(line.substr(11))]=atom_site_pos;
+            while(1)
+            {
+                while(1)
+                {
+                    if (compress_type) 
+                    {
+                        if (fin_gz.good()) getline(fin_gz, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                    }
+                    else
+                    {
+                        if (fin.good()) getline(fin, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                    }
+                    if (line.size()) break;
+                }
+                if (line.compare(0,11,"_atom_site.")) break;
+                _atom_site[Trim(line.substr(11))]=++atom_site_pos;
+                buf<<line<<'\n';
+            }
+
+            if (_atom_site.count("group_PDB")*
+                _atom_site.count("Cartn_x")*
+                _atom_site.count("Cartn_y")*
+                _atom_site.count("Cartn_z")==0)
+            {
+                buf<<line<<'\n';
+                cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, Cartn_x, Cartn_y, Cartn_z"<<endl;
+                continue;
+            }
+
+            while(1)
+            {
+                line_vec.clear();
+                split(line,line_vec);
+                if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                    line_vec[_atom_site["group_PDB"]]!="HETATM") break;
+
+                x[0]=atof(line_vec[_atom_site["Cartn_x"]].c_str());
+                x[1]=atof(line_vec[_atom_site["Cartn_y"]].c_str());
+                x[2]=atof(line_vec[_atom_site["Cartn_z"]].c_str());
+                if (mirror_opt) x[2]=-x[2];
+
+
+
+                if (_atom_site.count("auth_seq_id"))
+                    resi=line_vec[_atom_site["auth_seq_id"]];
+                else resi=line_vec[_atom_site["label_seq_id"]];
+                if (_atom_site.count("pdbx_PDB_ins_code") && 
+                    line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                    resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+                else resi+=" ";
+                if (read_resi>=2)
+                {
+                    if (_atom_site.count("auth_asym_id"))
+                        asym_id=line_vec[_atom_site["auth_asym_id"]];
+                    else asym_id=line_vec[_atom_site["label_asym_id"]];
+                    if (asym_id==".") asym_id=" ";
+                    resi+=asym_id[0];
+                }
+                hinge=0;
+                if (resi2hinge_dict.count(resi)) hinge=resi2hinge_dict[resi];
+                tu2t_u(tu_vec[hinge],t,u);
+                transform(t, u, x, x1);
+
+                for (atom_site_pos=0; atom_site_pos<_atom_site.size(); atom_site_pos++)
+                {
+                    if (atom_site_pos==_atom_site["Cartn_x"])
+                        buf<<setiosflags(ios::fixed)<<setprecision(3)
+                           <<setw(8)<<x1[0]<<' ';
+                    else if (atom_site_pos==_atom_site["Cartn_y"])
+                        buf<<setiosflags(ios::fixed)<<setprecision(3)
+                           <<setw(8)<<x1[1]<<' ';
+                    else if (atom_site_pos==_atom_site["Cartn_z"])
+                        buf<<setiosflags(ios::fixed)<<setprecision(3)
+                           <<setw(8)<<x1[2]<<' ';
+                    else buf<<line_vec[atom_site_pos]<<' ';
+                }
+                buf<<'\n';
+
+                if (compress_type && fin_gz.good()) getline(fin_gz, line);
+                else if (!compress_type && fin.good()) getline(fin, line);
+                else break;
+            }
+            if (compress_type?fin_gz.good():fin.good()) buf<<line<<'\n';
+        }
+        else if (line.size())
+        {
+            buf<<line<<'\n';
+            if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+        }
+    }
+    if (compress_type) fin_gz.close();
+    else               fin.close();
+
+    string fname_super_full=fname_super;
+    if (infmt==0)      fname_super_full+=".pdb";
+    else if (infmt==3) fname_super_full+=".cif";
+    ofstream fp;
+    fp.open(fname_super_full.c_str());
+    fp<<buf.str();
+    fp.close();
+    buf.str(string()); // clear stream
+
+    string chain1_sele;
+    string chain2_sele;
+    if (!mm_opt)
+    {
+        if (split_opt==2 && ter_opt>=1) // align one chain from model 1
+        {
+            chain1_sele=" and c. "+chainID1.substr(1);
+            chain2_sele=" and c. "+chainID2.substr(1);
+        }
+        else if (split_opt==2 && ter_opt==0) // align one chain from each model
+        {
+            for (i=1;i<chainID1.size();i++) if (chainID1[i]==',') break;
+            chain1_sele=" and c. "+chainID1.substr(i+1);
+            for (i=1;i<chainID2.size();i++) if (chainID2[i]==',') break;
+            chain2_sele=" and c. "+chainID2.substr(i+1);
+        }
+    }
+
+    /* extract aligned region */
+    int i1=-1;
+    int i2=-1;
+    string resi1_sele;
+    string resi2_sele;
+    string resi1_bond;
+    string resi2_bond;
+    string prev_resi1;
+    string prev_resi2;
+    string curr_resi1;
+    string curr_resi2;
+    if (mm_opt)
+    {
+        ;
+    }
+    else
+    {
+        for (i=0;i<strlen(seqM);i++)
+        {
+            i1+=(seqxA[i]!='-' && seqxA[i]!='*');
+            i2+=(seqyA[i]!='-');
+            if (seqM[i]==' ' || seqxA[i]=='*') continue;
+            curr_resi1=resi_vec1[i1].substr(0,4);
+            curr_resi2=resi_vec2[i2].substr(0,4);
+            if (resi1_sele.size()==0)
+                resi1_sele =    "i. "+curr_resi1;
+            else
+            {
+                resi1_sele+=" or i. "+curr_resi1;
+                resi1_bond+="bond structure1 and i. "+prev_resi1+
+                                              ", i. "+curr_resi1+"\n";
+            }
+            if (resi2_sele.size()==0)
+                resi2_sele =    "i. "+curr_resi2;
+            else
+            {
+                resi2_sele+=" or i. "+curr_resi2;
+                resi2_bond+="bond structure2 and i. "+prev_resi2+
+                                              ", i. "+curr_resi2+"\n";
+            }
+            prev_resi1=curr_resi1;
+            prev_resi2=curr_resi2;
+            //if (seqM[i]!=':') continue;
+        }
+        if (resi1_sele.size()) resi1_sele=" and ( "+resi1_sele+")";
+        if (resi2_sele.size()) resi2_sele=" and ( "+resi2_sele+")";
+    }
+
+    /* write pymol script */
+    vector<string> pml_list;
+    pml_list.push_back(fname_super+"");
+    pml_list.push_back(fname_super+"_atm");
+    pml_list.push_back(fname_super+"_all");
+    pml_list.push_back(fname_super+"_all_atm");
+    pml_list.push_back(fname_super+"_all_atm_lig");
+
+    for (int p=0;p<pml_list.size();p++)
+    {
+        if (mm_opt && p<=1) continue;
+        buf_pymol
+            <<"#!/usr/bin/env pymol\n"
+            <<"cmd.load(\""<<fname_super_full<<"\", \"structure1\")\n"
+            <<"cmd.load(\""<<yname<<"\", \"structure2\")\n"
+            <<"hide all\n"
+            <<"set all_states, "<<((ter_opt==0)?"on":"off")<<'\n';
+        if (p==0) // .pml
+        {
+            if (chain1_sele.size()) buf_pymol
+                <<"remove structure1 and not "<<chain1_sele.substr(4)<<"\n";
+            if (chain2_sele.size()) buf_pymol
+                <<"remove structure2 and not "<<chain2_sele.substr(4)<<"\n";
+            buf_pymol
+                <<"remove not n. CA and not n. C3'\n"
+                <<resi1_bond
+                <<resi2_bond
+                <<"show stick, structure1"<<chain1_sele<<resi1_sele<<"\n"
+                <<"show stick, structure2"<<chain2_sele<<resi2_sele<<"\n";
+        }
+        else if (p==1) // _atm.pml
+        {
+            buf_pymol
+                <<"show cartoon, structure1"<<chain1_sele<<resi1_sele<<"\n"
+                <<"show cartoon, structure2"<<chain2_sele<<resi2_sele<<"\n";
+        }
+        else if (p==2) // _all.pml
+        {
+            buf_pymol
+                <<"show ribbon, structure1"<<chain1_sele<<"\n"
+                <<"show ribbon, structure2"<<chain2_sele<<"\n";
+        }
+        else if (p==3) // _all_atm.pml
+        {
+            buf_pymol
+                <<"show cartoon, structure1"<<chain1_sele<<"\n"
+                <<"show cartoon, structure2"<<chain2_sele<<"\n";
+        }
+        else if (p==4) // _all_atm_lig.pml
+        {
+            buf_pymol
+                <<"show cartoon, structure1\n"
+                <<"show cartoon, structure2\n"
+                <<"show stick, not polymer\n"
+                <<"show sphere, not polymer\n";
+        }
+        buf_pymol
+            <<"color blue, structure1\n"
+            <<"color red, structure2\n"
+            <<"set ribbon_width, 6\n"
+            <<"set stick_radius, 0.3\n"
+            <<"set sphere_scale, 0.25\n"
+            <<"set ray_shadow, 0\n"
+            <<"bg_color white\n"
+            <<"set transparency=0.2\n"
+            <<"zoom polymer and ((structure1"<<chain1_sele
+            <<") or (structure2"<<chain2_sele<<"))\n"
+            <<endl;
+
+        fp.open((pml_list[p]+".pml").c_str());
+        fp<<buf_pymol.str();
+        fp.close();
+        buf_pymol.str(string());
+    }
+
+    /* clean up */
+    pml_list.clear();
+    
+    resi1_sele.clear();
+    resi2_sele.clear();
+    
+    resi1_bond.clear();
+    resi2_bond.clear();
+    
+    prev_resi1.clear();
+    prev_resi2.clear();
+
+    curr_resi1.clear();
+    curr_resi2.clear();
+
+    chain1_sele.clear();
+    chain2_sele.clear();
+    resi2hinge_dict.clear();
+}
+
+//output the final results
+void output_flexalign_results(const string xname, const string yname,
+    const string chainID1, const string chainID2,
+    const int xlen, const int ylen, double t[3], double u[3][3],
+    const vector<vector<double> >&tu_vec, const double TM1, const double TM2,
+    const double TM3, const double TM4, const double TM5,
+    const double rmsd, const double d0_out, const char *seqM,
+    const char *seqxA, const char *seqyA, const double Liden,
+    const int n_ali8, const int L_ali, const double TM_ali,
+    const double rmsd_ali, const double TM_0, const double d0_0,
+    const double d0A, const double d0B, const double Lnorm_ass,
+    const double d0_scale, const double d0a, const double d0u,
+    const char* fname_matrix, const int outfmt_opt, const int ter_opt,
+    const int mm_opt, const int split_opt, const int o_opt,
+    const string fname_super, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const int mirror_opt,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2)
+{
+    if (outfmt_opt<=0)
+    {
+        printf("\nName of Structure_1: %s%s (to be superimposed onto Structure_2)\n",
+            xname.c_str(), chainID1.c_str());
+        printf("Name of Structure_2: %s%s\n", yname.c_str(), chainID2.c_str());
+        printf("Length of Structure_1: %d residues\n", xlen);
+        printf("Length of Structure_2: %d residues\n\n", ylen);
+
+        if (i_opt)
+            printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali);
+
+        printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
+        printf("TM-score= %6.5f (normalized by length of Structure_1: L=%d, d0=%.2f)\n", TM2, xlen, d0B);
+        printf("TM-score= %6.5f (normalized by length of Structure_2: L=%d, d0=%.2f)\n", TM1, ylen, d0A);
+
+        if (a_opt==1)
+            printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+        if (u_opt)
+            printf("TM-score= %6.5f (normalized by user-specified L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u);
+        if (d_opt)
+            printf("TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen);
+        printf("(You should use TM-score normalized by length of the reference structure)\n");
+    
+        //output alignment
+        printf("\n([0-9] denote different aligned fragment pairs separated by different hinges)\n");
+        printf("%s\n", seqxA);
+        printf("%s\n", seqM);
+        printf("%s\n", seqyA);
+    }
+    else if (outfmt_opt==1)
+    {
+        printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
+            xname.c_str(), chainID1.c_str(), xlen, d0B, Liden/xlen, TM2);
+        printf("%s\n", seqxA);
+        printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
+            yname.c_str(), chainID2.c_str(), ylen, d0A, Liden/ylen, TM1);
+        printf("%s\n", seqyA);
+
+        printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n",
+            n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
+
+        if (i_opt)
+            printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali);
+
+        if(a_opt)
+            printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+
+        if(u_opt)
+            printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u);
+
+        if(d_opt)
+            printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen);
+
+        printf("$$$$\n");
+    }
+    else if (outfmt_opt==2)
+    {
+        printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d",
+            xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(),
+            TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0,
+            xlen, ylen, n_ali8);
+    }
+    cout << endl;
+
+    if (strlen(fname_matrix)) output_flexalign_rotation_matrix(
+            fname_matrix, tu_vec, t, u);
+
+    if (o_opt==1) output_flexalign_pymol(xname, yname, fname_super, tu_vec,
+            t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA,
+            resi_vec1, resi_vec2, chainID1, chainID2);
+    else if (o_opt==2)
+        output_flexalign_rasmol(xname, yname, fname_super, tu_vec,
+            t, u, ter_opt, mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA,
+            resi_vec1, resi_vec2, chainID1, chainID2,
+            xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden);
+}
+
+#endif
diff --git a/modules/bindings/src/tmalign/param_set.h b/modules/bindings/src/USalign/param_set.h
similarity index 100%
rename from modules/bindings/src/tmalign/param_set.h
rename to modules/bindings/src/USalign/param_set.h
diff --git a/modules/bindings/src/tmalign/pdb2fasta.cpp b/modules/bindings/src/USalign/pdb2fasta.cpp
similarity index 79%
rename from modules/bindings/src/tmalign/pdb2fasta.cpp
rename to modules/bindings/src/USalign/pdb2fasta.cpp
index 7c94206ff..e0fc71206 100644
--- a/modules/bindings/src/tmalign/pdb2fasta.cpp
+++ b/modules/bindings/src/USalign/pdb2fasta.cpp
@@ -20,16 +20,21 @@ void print_help()
 "             Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n"
 "             (note the spaces before and after CA).\n"
 "\n"
+"    -mol     Type of molecule(s) to align.\n"
+"             auto: (default) align both protein and nucleic acids.\n"
+"             prot: only align proteins in a structure.\n"
+"             RNA : only align RNA and DNA in a structure.\n"
+"\n"
 "    -ter     Strings to mark the end of a chain\n"
-"             3: (default) TER, ENDMDL, END or different chain ID\n"
+"             3: TER, ENDMDL, END or different chain ID\n"
 "             2: ENDMDL, END, or different chain ID\n"
-"             1: ENDMDL or END\n"
+"             1: (default) ENDMDL or END\n"
 "             0: end of file\n"
 "\n"
 "    -split   Whether to split PDB file into multiple chains\n"
-"             0: (default) treat the whole structure as one single chain\n"
+"             0: treat the whole structure as one single chain\n"
 "             1: treat each MODEL as a separate chain (-ter should be 0)\n"
-"             2: treat each chain as a seperate chain (-ter should be <=1)\n"
+"             2: (default) treat each chain as a seperate chain (-ter should be <=1)\n"
 "\n"
 "    -het     Whether to read residues marked as 'HETATM' in addition to 'ATOM  '\n"
 "             0: (default) only align 'ATOM  ' residues\n"
@@ -53,11 +58,12 @@ int main(int argc, char *argv[])
     /*    get argument    */
     /**********************/
     string xname     = "";
-    int    ter_opt   =3;     // TER, END, or different chainID
+    int    ter_opt   =1;     // TER, END, or different chainID
     int    infmt_opt =-1;    // PDB or PDBx/mmCIF format
-    int    split_opt =0;     // do not split chain
+    int    split_opt =2;     // do not split chain
     int    het_opt=0;        // do not read HETATM residues
     string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
+    string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
     string suffix_opt="";    // set -suffix to empty
     string dir_opt   ="";    // set -dir to empty
     vector<string> chain_list; // only when -dir1 is set
@@ -77,6 +83,12 @@ int main(int argc, char *argv[])
         {
             atom_opt=argv[i + 1]; i++;
         }
+        else if ( !strcmp(argv[i],"-mol") )
+        {
+            if (i>=(argc-1)) 
+                PrintErrorAndQuit("ERROR! Missing value for -mol");
+            mol_opt=argv[i + 1]; i++;
+        }
         else if ( !strcmp(argv[i],"-dir") && i < (argc-1) )
         {
             dir_opt=argv[i + 1]; i++;
@@ -108,6 +120,16 @@ int main(int argc, char *argv[])
         PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1");
     if (split_opt<0 || split_opt>2)
         PrintErrorAndQuit("-split can only be 0, 1 or 2");
+    if (mol_opt=="prot") mol_opt="protein";
+    else if (mol_opt=="DNA") mol_opt="RNA";
+    if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
+        PrintErrorAndQuit("ERROR! Molecule type must be one of the"
+            "following:\nauto, prot (the same as 'protein'), and "
+            "RNA (the same as 'DNA').");
+    if (mol_opt=="protein" && atom_opt=="auto")
+        atom_opt=" CA ";
+    else if (mol_opt=="RNA" && atom_opt=="auto")
+        atom_opt=" C3'";
 
     /* parse file list */
     if (dir_opt.size()==0)
diff --git a/modules/bindings/src/tmalign/pdb2ss.cpp b/modules/bindings/src/USalign/pdb2ss.cpp
similarity index 100%
rename from modules/bindings/src/tmalign/pdb2ss.cpp
rename to modules/bindings/src/USalign/pdb2ss.cpp
diff --git a/modules/bindings/src/tmalign/pdb2xyz.cpp b/modules/bindings/src/USalign/pdb2xyz.cpp
similarity index 100%
rename from modules/bindings/src/tmalign/pdb2xyz.cpp
rename to modules/bindings/src/USalign/pdb2xyz.cpp
diff --git a/modules/bindings/src/USalign/pdbAtomName.cpp b/modules/bindings/src/USalign/pdbAtomName.cpp
new file mode 100644
index 000000000..d65c576d2
--- /dev/null
+++ b/modules/bindings/src/USalign/pdbAtomName.cpp
@@ -0,0 +1,232 @@
+#include <fstream>
+#include <map>
+#include <sstream>
+#include <iostream>
+#include <string>
+#include <vector>
+#include "pstream.h"
+
+using namespace std;
+
+void print_help()
+{
+    cout <<
+"Fix atom name justification in PDB format file.\n"
+"\n"
+"Usage: pdbAtomName input.pdb output.pdb\n"
+    <<endl;
+    exit(EXIT_SUCCESS);
+}
+
+void splitlines(const string &line, vector<string> &lines,
+    const char delimiter='\n')
+{
+    bool within_word = false;
+    for (size_t pos=0;pos<line.size();pos++)
+    {
+        if (line[pos]==delimiter)
+        {
+            within_word = false;
+            continue;
+        }
+        if (!within_word)
+        {
+            within_word = true;
+            lines.push_back("");
+        }
+        lines.back()+=line[pos];
+    }
+}
+
+size_t pdbAtomName(const string &infile,const string &outfile)
+{
+    stringstream buf;
+    if (infile=="-") buf<<cin.rdbuf();
+#if defined(REDI_PSTREAM_H_SEEN)
+    else if (infile.size()>3 && infile.substr(infile.size()-3)==".gz")
+    {
+        redi::ipstream fp_gz; // if file is compressed
+        fp_gz.open("gunzip -c "+infile);
+        buf<<fp_gz.rdbuf();
+        fp_gz.close();
+    }
+#endif
+    else
+    {
+        ifstream fp;
+        fp.open(infile.c_str(),ios::in); //ifstream fp(filename,ios::in);
+        buf<<fp.rdbuf();
+        fp.close();
+    }
+    vector<string> lines;
+    splitlines(buf.str(),lines);
+    buf.str(string());
+
+    map<string,string> aa3to1;
+    aa3to1["  A"]=aa3to1[" DA"]='a';
+    aa3to1["  C"]=aa3to1[" DC"]='c';
+    aa3to1["  G"]=aa3to1[" DG"]='g';
+    aa3to1["  U"]=aa3to1["PSU"]='u';
+    aa3to1["  I"]=aa3to1[" DI"]='i';
+    aa3to1["  T"]='t';
+    aa3to1["ALA"]='A';
+    aa3to1["CYS"]='C';
+    aa3to1["ASP"]='D';
+    aa3to1["GLU"]='E';
+    aa3to1["PHE"]='F';
+    aa3to1["GLY"]='G';
+    aa3to1["HIS"]='H';
+    aa3to1["ILE"]='I';
+    aa3to1["LYS"]='K';
+    aa3to1["LEU"]='L';
+    aa3to1["MET"]=aa3to1["MSE"]='M';
+    aa3to1["ASN"]='N';
+    aa3to1["PRO"]='P';
+    aa3to1["GLN"]='Q';
+    aa3to1["ARG"]='R';
+    aa3to1["SER"]='S';
+    aa3to1["THR"]='T';
+    aa3to1["VAL"]='V'; 
+    aa3to1["TRP"]='W';
+    aa3to1["TYR"]='Y';
+    aa3to1["ASX"]='B';
+    aa3to1["GLX"]='Z';
+    aa3to1["SEC"]='U';
+    aa3to1["PYL"]='O';
+
+    size_t l=0;
+    string atom="    ";
+    string resn="   ";
+    int idxBegin = -1;
+    int idxEnd = -1;
+    int i;
+    string msg;
+    map<string,int> msg_dict;
+    size_t changeNum=0;
+    for (l=0;l<lines.size();l++)
+    {
+        if (lines[l].substr(0,6)=="ATOM  " ||
+            lines[l].substr(0,6)=="HETATM")
+        {
+            if (lines[l].size()<54)
+            {
+                cerr<<"incomplete:"<<lines[l]<<endl;
+                continue;
+            }
+            resn=lines[l].substr(17,3);
+            if (resn[2]==' ')
+            {
+                if (resn[1]==' ') resn="  "+resn.substr(0,1);
+                else resn=" "+resn.substr(0,2);
+                msg=lines[l].substr(17,3)+"=>"+resn;
+                if (msg_dict.count(msg)==0)
+                {
+                    cerr<<msg<<'.'<<endl;
+                    msg_dict[msg]=0;
+                }
+                msg_dict[msg]++;
+                changeNum++;
+            }
+            if (lines[l].size()<78 && aa3to1.count(resn)==0)
+            {
+                cerr<<"heteroatom:"<<lines[l]<<endl;
+                buf<<lines[l].substr(0,17)<<resn<<lines[l].substr(20)<<endl;
+                continue;
+            }
+
+            atom=lines[l].substr(12,4);
+            idxBegin = idxEnd = -1;
+            for (i=0;i<4;i++)
+            {
+                if (atom[i]==' ') continue;
+                if (idxBegin==-1) idxBegin=i;
+                idxEnd=i;
+            }
+            if (idxBegin>=0 && (idxBegin>0 || idxEnd<3))
+                atom = atom.substr(idxBegin, idxEnd + 1 - idxBegin);
+            if (atom[atom.size()-1]=='*') // C3* (old) => C3' (new)
+                atom=atom.substr(0,atom.size()-1)+"'";
+            if (atom.size()==4) 
+            {
+                buf<<lines[l].substr(0,17)<<resn<<lines[l].substr(20)<<endl;
+                continue;
+            }
+            if ((lines[l].size()>=78 && lines[l][76]!=' ' && lines[l][77]!=' ')||
+                ('0'<=atom[0] && atom[0]<='9'))
+            {
+                if      (atom.size()==1) atom+="   ";
+                else if (atom.size()==2) atom+="  ";
+                else if (atom.size()==3) atom+=" ";
+            }
+            else if (resn=="MSE" && atom=="SE") atom="SE  ";
+            else
+            {
+                if      (atom.size()==1) atom=" "+atom+"  ";
+                else if (atom.size()==2) atom=" "+atom+" ";
+                else if (atom.size()==3) atom=" "+atom;
+            }
+            if (atom!=lines[l].substr(12,4))
+            {
+                msg=resn+":"+lines[l].substr(12,4)+"=>"+atom;
+                if (msg_dict.count(msg)==0)
+                {
+                    cerr<<msg<<'.'<<endl;
+                    msg_dict[msg]=0;
+                }
+                msg_dict[msg]++;
+                changeNum++;
+            }
+            buf<<lines[l].substr(0,12)<<atom<<lines[l].substr(16,1)
+               <<resn<<lines[l].substr(20)<<endl;
+        }
+        else if (lines[l].size())
+        {
+            buf<<lines[l]<<endl;
+        }
+        lines[l].clear();
+    }
+
+    if (outfile=="-")
+        cout<<buf.str();
+    else
+    {
+        ofstream fout;
+        fout.open(outfile.c_str(),ios::out);
+        fout<<buf.str();
+        fout.close();
+    }
+    buf.str(string());
+    vector<string>().swap(lines);
+    map<string,int>().swap(msg_dict);
+    map<string,string>().swap(aa3to1);
+    if (changeNum)
+        cerr<<"Update "<<changeNum<<" atom name in "<<infile<<endl;
+    return changeNum;
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2) print_help();
+
+    string infile ="";
+    string outfile="";
+
+    for (int i=1; i<argc; i++)
+    {
+        if (infile.size()==0) infile=argv[i];
+        else if (outfile.size()==0) outfile=argv[i];
+        else
+        {
+            cerr<<"ERROR! no such option "<<argv[i]<<endl;
+            exit(1);
+        }
+    }
+
+    if (outfile.size()==0) outfile="-";
+
+    pdbAtomName(infile,outfile);
+    
+    infile.clear();
+    outfile.clear();
+    return 0;
+}
diff --git a/modules/bindings/src/tmalign/pstream.h b/modules/bindings/src/USalign/pstream.h
similarity index 99%
rename from modules/bindings/src/tmalign/pstream.h
rename to modules/bindings/src/USalign/pstream.h
index 28cbeadb1..12c759874 100644
--- a/modules/bindings/src/tmalign/pstream.h
+++ b/modules/bindings/src/USalign/pstream.h
@@ -15,6 +15,11 @@
  * and redi::rpstream.
  */
 
+/* do not compile on windows, which does not have cygwin */
+#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) && !defined(__CYGWIN__)
+#define NO_PSTREAM
+#else
+
 #ifndef REDI_PSTREAM_H_SEEN
 #define REDI_PSTREAM_H_SEEN
 
@@ -2250,6 +2255,6 @@ namespace redi
  */
 
 #endif  // REDI_PSTREAM_H_SEEN
-
+#endif  // WIN32
 // vim: ts=2 sw=2 expandtab
 
diff --git a/modules/bindings/src/USalign/qTMclust.cpp b/modules/bindings/src/USalign/qTMclust.cpp
new file mode 100644
index 000000000..08fc64b68
--- /dev/null
+++ b/modules/bindings/src/USalign/qTMclust.cpp
@@ -0,0 +1,723 @@
+/* Different filters are used when different header files are included.
+ * At least one of HwRMSD.h and TMalign.h should be included.
+ * HwRMSD.h implement HwRMSD filter.
+ * No filter will be used if only TMalign.h is included. */
+
+#include "HwRMSD.h"
+#include "TMalign.h"
+
+using namespace std;
+
+void print_extra_help()
+{
+    cout <<
+"Additional options:\n"
+"    -fast    Fast but slightly inaccurate final alignment\n"
+"\n"
+"    -atom    4-character atom name used to represent a residue.\n"
+"             Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n"
+"             (note the spaces before and after CA).\n"
+"\n"
+"    -mol     Molecule type: RNA or protein\n"
+"             Default is detect molecule type automatically\n"
+"\n"
+"    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
+"    -infmt   Input format\n"
+"            -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
+"             0: PDB format\n"
+"             1: SPICKER format\n"
+"             2: xyz format\n"
+"             3: PDBx/mmCIF format\n"
+    <<endl;
+}
+
+void print_help(bool h_opt=false)
+{
+    cout << "\n"
+"qTMclust: Structure Clustering by Sequence-Indepedent Structure Alignment\n"
+"\n"
+"Usage 1: (alignment within a folder of PDB files)\n"
+"    qTMclust -dir chain_folder/ chain_list -TMcut 0.5 -o cluster.txt\n"
+"\n"
+"Usage 2: (alignment within chains or within models of a single PDB file)\n"
+"    qTMclust -split 2 -ter 1 multichain.pdb -TMcut 0.5 -o cluster.txt\n"
+"    qTMclust -split 1 -ter 0 multimodel.pdb -TMcut 0.5 -o cluster.txt\n"
+"\n"
+"Options:\n"
+"    -TMcut   TM-score cutoff in the range of [0.45,1) for considering two\n"
+"             structures being similar. Default is 0.5.\n"
+"\n"
+"    -s       Which TM-score to use when aligning structures with different lengths?\n"
+"             1: the larger TM-score, i.e. normalized by shorter length\n"
+"             2: (default) the smaller TM-score, i.e. normalized by longer length\n"
+"             3: average of the two TM-scores\n"
+"             4: harmonic average of the two TM-scores\n"
+"             5: geometric average of the two TM-scores\n"
+"             6: root mean square of the two TM-scores\n"
+"\n"
+"    -o       Output the cluster result to file.\n"
+"             Default is print result to screen.\n"
+"\n"
+"    -dir     Perform all-against-all alignment among the list of PDB\n"
+"             chains listed by 'chain_list' under 'chain_folder'. Note\n"
+"             that the slash is necessary.\n"
+"             $ qTMclust -dir chain_folder/ chain_list\n"
+"\n"
+"    -suffix  (Only when -dir is set, default is empty)\n"
+"             add file name suffix to files listed by chain_list\n"
+"\n"
+"    -ter     Strings to mark the end of a chain\n"
+"             3: (default) TER, ENDMDL, END or different chain ID\n"
+"             2: ENDMDL, END, or different chain ID\n"
+"             1: ENDMDL or END\n"
+"             0: end of file\n"
+"\n"
+"    -split   Whether to split PDB file into multiple chains\n"
+"             0: (default) treat the whole structure as one single chain\n"
+"             1: treat each MODEL as a separate chain (-ter should be 0)\n"
+"             2: treat each chain as a seperate chain (-ter should be <=1)\n"
+"\n"
+"    -h       Print the full help message, including additional options.\n"
+"\n"
+    <<endl;
+
+    if (h_opt) print_extra_help();
+
+    exit(EXIT_SUCCESS);
+}
+
+void filter_lower_bound(double &lb_HwRMSD, double &lb_TMfast, 
+    const double TMcut, const int s_opt,const int mol_type)
+{
+    lb_HwRMSD=0.5*TMcut;
+    lb_TMfast=0.9*TMcut;
+    if (s_opt<=1)
+    {
+        if (mol_type>0) // RNA
+        {
+            lb_HwRMSD=0.02*TMcut;
+            lb_TMfast=0.60*TMcut;
+        }
+        else // protein
+        {
+            lb_HwRMSD=0.25*TMcut;
+            lb_TMfast=0.80*TMcut;
+        }
+    }
+    return;
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2) print_help();
+
+
+    clock_t t1, t2;
+    t1 = clock();
+
+    /**********************/
+    /*    get argument    */
+    /**********************/
+    string xname       = "";
+    double TMcut       = 0.5;
+    string fname_clust = ""; // file name for output cluster result
+    string fname_lign  = ""; // file name for user alignment
+    vector<string> sequence; // get value from alignment file
+    double Lnorm_ass, d0_scale;
+
+    bool h_opt = false; // print full help message
+    int  i_opt = 0;     // 3 for -I, stick to user given alignment
+    int  a_opt = 0;     // flag for -a, do not normalized by average length
+    int  s_opt = 2;     // flag for -s, normalized by longer length
+    bool u_opt = false; // flag for -u, normalized by user specified length
+    bool d_opt = false; // flag for -d, user specified d0
+
+    int    infmt_opt =-1;    // PDB or PDBx/mmCIF format
+    int    ter_opt   =3;     // TER, END, or different chainID
+    int    split_opt =0;     // do not split chain
+    bool   fast_opt  =false; // flags for -fast, fTM-align algorithm
+    int    het_opt   =0;     // do not read HETATM residues
+    string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
+    string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
+    string suffix_opt="";    // set -suffix to empty
+    string dir_opt   ="";    // set -dir to empty
+    int    byresi_opt=0;     // set -byresi to 0
+    vector<string> chain_list;
+
+    for(int i = 1; i < argc; i++)
+    {
+        if ( (!strcmp(argv[i],"-u")||!strcmp(argv[i],"-L")) && i < (argc-1) )
+        {
+            PrintErrorAndQuit("Sorry! -u has not been implemented yet");
+            Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++;
+        }
+        else if ( !strcmp(argv[i],"-d") && i < (argc-1) )
+        {
+            PrintErrorAndQuit("Sorry! -d has not been implemented yet");
+            d0_scale = atof(argv[i + 1]); d_opt = true; i++;
+        }
+        else if (!strcmp(argv[i], "-I") && i < (argc-1) )
+        {
+            fname_lign = argv[i + 1];      i_opt = 3; i++;
+        }
+        else if ( !strcmp(argv[i],"-o") && i < (argc-1) )
+        {
+            fname_clust = argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-a") && i < (argc-1))
+        {
+            PrintErrorAndQuit("Sorry! -a is not used for clustering");
+        }
+        else if ( !strcmp(argv[i],"-s") && i < (argc-1) )
+        {
+            s_opt=atoi(argv[i + 1]); i++;
+            if (s_opt<1 || s_opt>6)
+                PrintErrorAndQuit("-s must be within 1 to 6");
+        }
+        else if ( !strcmp(argv[i],"-h") )
+        {
+            h_opt = true;
+        }
+        else if (!strcmp(argv[i], "-fast"))
+        {
+            fast_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-infmt") && i < (argc-1) )
+        {
+            infmt_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-ter") && i < (argc-1) )
+        {
+            ter_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-split") && i < (argc-1) )
+        {
+            split_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-atom") && i < (argc-1) )
+        {
+            atom_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-mol") && i < (argc-1) )
+        {
+            mol_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir") && i < (argc-1) )
+        {
+            dir_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) )
+        {
+            suffix_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) )
+        {
+            TMcut=atof(argv[i + 1]); i++;
+            if (TMcut>1 or TMcut<0.45)
+                PrintErrorAndQuit("TMcut must be in the range of [0.45,1)");
+        }
+        else if ( !strcmp(argv[i],"-byresi") && i < (argc-1) )
+        {
+            PrintErrorAndQuit("Sorry! -byresi has not been implemented yet");
+            byresi_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
+        else if (xname.size() == 0) xname=argv[i];
+        else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
+    }
+
+    if(xname.size()==0) print_help(h_opt);
+
+    if (suffix_opt.size() && dir_opt.size()==0)
+        PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set");
+    if (atom_opt.size()!=4)
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
+    if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
+        PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein.");
+    else if (mol_opt=="protein" && atom_opt=="auto")
+        atom_opt=" CA ";
+    else if (mol_opt=="RNA" && atom_opt=="auto")
+        atom_opt=" C3'";
+
+    if (u_opt && Lnorm_ass<=0)
+        PrintErrorAndQuit("Wrong value for option -u!  It should be >0");
+    if (d_opt && d0_scale<=0)
+        PrintErrorAndQuit("Wrong value for option -d!  It should be >0");
+    if (split_opt==1 && ter_opt!=0)
+        PrintErrorAndQuit("-split 1 should be used with -ter 0");
+    else if (split_opt==2 && ter_opt!=0 && ter_opt!=1)
+        PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1");
+    if (split_opt<0 || split_opt>2)
+        PrintErrorAndQuit("-split can only be 0, 1 or 2");
+
+    /* read initial alignment file from 'align.txt' */
+    if (i_opt) read_user_alignment(sequence, fname_lign, i_opt);
+
+    if (byresi_opt) i_opt=3;
+
+    /* parse file list */
+    if (dir_opt.size()==0) chain_list.push_back(xname);
+    else file2chainlist(chain_list, xname, dir_opt, suffix_opt);
+
+    /* declare previously global variables */
+    vector<vector<string> >PDB_lines; // text of chain
+    vector<int>    mol_vec;           // molecule type of chain1, RNA if >0
+    vector<string> chainID_list;      // list of chainID
+    size_t xchainnum=0;         // number of chains in a PDB file
+    size_t i,j;                 // number of residues/chains in a PDB is
+                                // usually quite limited. Yet, the number of
+                                // files can be very large. size_t is safer
+                                // than int for very long list of files
+    int    xlen,ylen;           // chain length
+    double **xa,**ya;           // xyz coordinate
+    vector<string> resi_vec;    // residue index for chain, dummy variable
+    vector<pair<int,size_t> >chainLen_list; // vector of (length,index) pair
+    vector<vector<char> > seq_vec;
+    vector<vector<char> > sec_vec;
+    vector<vector<vector<float> > >xyz_vec;
+
+    /* parse files */
+    string chain_name;
+    vector<char>  seq_tmp;
+    vector<char>  sec_tmp;
+    vector<float> flt_tmp(3,0);
+    vector<vector<float> >xyz_tmp;
+    int r; // residue index
+    size_t newchainnum;
+    double ub_HwRMSD=0.90*TMcut+0.10;
+    double lb_HwRMSD=0.5*TMcut;
+    double ub_TMfast=0.90*TMcut+0.10;
+    double lb_TMfast=0.9*TMcut;
+    if      (s_opt==2 || s_opt==4 || s_opt==5) a_opt=-2; // normalized by longer length, i.e. smaller TM
+    else if (s_opt==1 || s_opt==5) a_opt=-1; // normalized by shorter length, i.e. larger TM
+    else if (s_opt==3) a_opt= 1; // normalized by average length
+
+#ifdef TMalign_HwRMSD_h
+    /* These parameters controls HwRMSD filter. iter_opt typically should be
+     * >=3. Many alignments converge within iter_opt=5. Occassionally
+     * some alignments require iter_opt=10. Higher iter_opt takes more time,
+     * even though HwRMSD iter_opt 10 still takes far less time than TMalign
+     * -fast -TMcut 0.5.
+     * After HwRMSD filter, at least min_repr_num and at most max_repr_num
+     * are used for subsequent TMalign. The actual number of representatives
+     * are decided by xlen */
+    const int glocal    =0; // global alignment
+    const int iter_opt  =10;
+    const int min_repr_num=10;
+    const int max_repr_num=50;
+#endif
+
+    for (i=0;i<chain_list.size();i++)
+    {
+        xname=chain_list[i];
+        newchainnum=get_PDB_lines(xname, PDB_lines, chainID_list,
+            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt, het_opt);
+        if (!newchainnum)
+        {
+            cerr<<"Warning! Cannot parse file: "<<xname
+                <<". Chain number 0."<<endl;
+            continue;
+        }
+        chain_name=xname.substr(dir_opt.size(),
+            xname.size()-dir_opt.size()-suffix_opt.size());
+        for (j=0;j<newchainnum;j++)
+        {
+            chainID_list[j+xchainnum]=chain_name+chainID_list[j+xchainnum];
+            xlen=PDB_lines[j].size();
+            cout<<"Parsing "<<xname<<'\t'<<chainID_list[j+xchainnum]
+                <<" ("<<xlen<<" residues)."<<endl;
+            if (mol_opt=="RNA") mol_vec[j+xchainnum]=1;
+            else if (mol_opt=="protein") mol_vec[j+xchainnum]=-1;
+
+            NewArray(&xa, xlen, 3);
+            seq_tmp.assign(xlen+1,'A');
+            sec_tmp.assign(xlen+1,0);
+
+            read_PDB(PDB_lines[j], xa, &seq_tmp[0], resi_vec, byresi_opt);
+
+            if (mol_vec[j]<=0) make_sec(xa, xlen, &sec_tmp[0]);
+            else make_sec(&seq_tmp[0],xa,xlen,&sec_tmp[0],atom_opt);
+
+            xyz_tmp.assign(xlen,flt_tmp);
+            for (r=0;r<xlen;r++)
+            {
+                xyz_tmp[r][0]=xa[r][0];
+                xyz_tmp[r][1]=xa[r][1];
+                xyz_tmp[r][2]=xa[r][2];
+            }
+
+            seq_vec.push_back(seq_tmp);
+            sec_vec.push_back(sec_tmp);
+            xyz_vec.push_back(xyz_tmp);
+
+            chainLen_list.push_back(
+                make_pair(PDB_lines[j].size(),j+xchainnum));
+
+            seq_tmp.clear();
+            sec_tmp.clear();
+            xyz_tmp.clear();
+            DeleteArray(&xa, xlen);
+            PDB_lines[j].clear();
+        }
+        PDB_lines.clear();
+        xchainnum+=newchainnum;
+    }
+    flt_tmp.clear();
+    chain_list.clear();
+
+    // swap completely destroy the vector and free up the memory capacity
+    vector<vector<string> >().swap(PDB_lines);
+    size_t Nstruct=chainLen_list.size();
+
+    /* sort by chain length */
+    stable_sort(chainLen_list.begin(),chainLen_list.end(),
+        greater<pair<int,int> >());
+    cout<<"Clustering "<<chainLen_list.size()
+        <<" chains with TM-score cutoff >="<<TMcut<<'\n'
+        <<"Longest chain "<<chainID_list[chainLen_list[0].second]<<'\t'
+        <<chainLen_list[0].first<<" residues.\n"
+        <<"Shortest chain "<<chainID_list[chainLen_list.back().second]<<'\t'
+        <<chainLen_list.back().first<<" residues."<<endl;
+
+    /* set the first cluster */
+    vector<size_t> clust_mem_vec(Nstruct,-1); // cluster membership
+    vector<size_t> clust_repr_vec; // the same as number of clusters
+    size_t chain_i=chainLen_list[0].second;
+    clust_repr_vec.push_back(chain_i);
+    clust_mem_vec[chain_i]=0;
+    map<size_t,size_t> clust_repr_map;
+
+    /* perform alignment */
+    size_t chain_j;
+    const double fast_lb=50.;  // proteins shorter than fast_lb never use -fast
+    const double fast_ub=1000.;// proteins longer than fast_ub always use -fast
+    double Lave;               // average protein length for chain_i and chain_j
+    size_t sizePROT;           // number of representatives for current chain
+    vector<size_t> index_vec;  // index of cluster representatives for the chain
+    bool found_clust;          // whether current chain hit previous cluster
+
+    for (i=1;i<Nstruct;i++)
+    {
+        chain_i=chainLen_list[i].second;
+        xlen=xyz_vec[chain_i].size();
+        if (xlen<=5) // TMalign cannot handle L<=5
+        {
+            clust_mem_vec[chain_i]=clust_repr_vec.size();
+            clust_repr_vec.push_back(clust_repr_vec.size());
+            continue;
+        }
+
+        NewArray(&xa, xlen, 3);
+        for (r=0;r<xlen;r++)
+        {
+            xa[r][0]=xyz_vec[chain_i][r][0];
+            xa[r][1]=xyz_vec[chain_i][r][1];
+            xa[r][2]=xyz_vec[chain_i][r][2];
+        }
+
+        // j-1 is index of old cluster. here, we starts from the latest
+        // cluster because proteins with similar length are more likely
+        // to be similar. we cannot use j as index because size_t j cannot
+        // be negative at the end of this loop
+        for (j=clust_repr_vec.size();j>0;j--)
+        {
+            chain_j=clust_repr_vec[j-1];
+            ylen=xyz_vec[chain_j].size();
+            if (mol_vec[chain_i]*mol_vec[chain_j]<0)    continue;
+            else if (s_opt==2 && xlen<TMcut*ylen)       continue;
+            else if (s_opt==3 && xlen<(2*TMcut-1)*ylen) continue;
+            else if (s_opt==4 && xlen*(2/TMcut-1)<ylen) continue;
+            else if (s_opt==5 && xlen<TMcut*TMcut*ylen) continue;
+            else if (s_opt==6 && xlen*xlen<(2*TMcut*TMcut-1)*ylen*ylen) continue;
+            index_vec.push_back(chain_j);
+        }
+        sizePROT=index_vec.size();
+
+        cout<<'>'<<chainID_list[chain_i]<<'\t'<<xlen<<'\t'
+            <<setiosflags(ios::fixed)<<setprecision(2)
+            <<100.*i/Nstruct<<"%(#"<<i<<")\t"
+            <<"#repr="<<sizePROT<<"/"<<clust_repr_vec.size()<<endl;
+
+#ifdef TMalign_HwRMSD_h
+        vector<pair<double,size_t> > HwRMSDscore_list;
+        double TM;
+        for (j=0;j<sizePROT;j++)
+        {
+            chain_j=index_vec[j];
+            ylen=xyz_vec[chain_j].size();
+            if (mol_vec[chain_i]*mol_vec[chain_j]<0)    continue;
+            else if (s_opt==2 && xlen<TMcut*ylen)       continue;
+            else if (s_opt==3 && xlen<(2*TMcut-1)*ylen) continue;
+            else if (s_opt==4 && xlen*(2/TMcut-1)<ylen) continue;
+            else if (s_opt==5 && xlen<TMcut*TMcut*ylen) continue;
+            else if (s_opt==6 && xlen*xlen<(2*TMcut*TMcut-1)*ylen*ylen) continue;
+
+            if (s_opt<=1) filter_lower_bound(lb_HwRMSD, lb_TMfast, 
+                TMcut, s_opt, mol_vec[chain_i]+mol_vec[chain_j]);
+            
+            NewArray(&ya, ylen, 3);
+            for (r=0;r<ylen;r++)
+            {
+                ya[r][0]=xyz_vec[chain_j][r][0];
+                ya[r][1]=xyz_vec[chain_j][r][1];
+                ya[r][2]=xyz_vec[chain_j][r][2];
+            }
+
+            /* declare variable specific to this pair of HwRMSD */
+            double t0[3], u0[3][3];
+            double TM1, TM2;
+            double TM3, TM4, TM5;     // for s_opt, u_opt, d_opt
+            double d0_0, TM_0;
+            double d0A, d0B, d0u, d0a;
+            double d0_out=5.0;
+            string seqM, seqxA, seqyA;// for output alignment
+            double rmsd0 = 0.0;
+            int L_ali;                // Aligned length in standard_TMscore
+            double Liden=0;
+            double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+            int n_ali=0;
+            int n_ali8=0;
+            int *invmap = new int[ylen+1];
+
+            /* entry function for structure alignment */
+            HwRMSD_main(
+                xa, ya, &seq_vec[chain_i][0], &seq_vec[chain_j][0],
+                &sec_vec[chain_i][0], &sec_vec[chain_j][0], t0, u0,
+                TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u,
+                d0a, d0_out, seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali,
+                rmsd_ali, n_ali, n_ali8, xlen, ylen,
+                sequence, Lnorm_ass,
+                d0_scale, i_opt,
+                a_opt, u_opt, d_opt, mol_vec[chain_i]+mol_vec[chain_j],
+                invmap, glocal, iter_opt);
+
+            TM=TM3; // average length
+            if      (s_opt==1) TM=TM2; // shorter length
+            else if (s_opt==2) TM=TM1; // longer length
+            else if (s_opt==3) TM=(TM1+TM2)/2;     // average TM
+            else if (s_opt==4) TM=2/(1/TM1+1/TM2); // harmonic average
+            else if (s_opt==5) TM=sqrt(TM1*TM2);   // geometric average
+            else if (s_opt==6) TM=sqrt((TM1*TM1+TM2*TM2)/2); // root mean square
+
+            Lave=sqrt(xlen*ylen); // geometry average because O(L1*L2)
+            if (TM>=lb_HwRMSD || Lave<=fast_lb)
+                HwRMSDscore_list.push_back(make_pair(TM,index_vec[j]));
+
+            /* clean up after each HwRMSD */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+            DeleteArray(&ya, ylen);
+            delete [] invmap;
+
+            /* if a good hit is guaranteed to be found, stop the loop */
+            if (TM>=ub_HwRMSD) break;
+        }
+
+        stable_sort(HwRMSDscore_list.begin(),HwRMSDscore_list.end(),
+            greater<pair<double,size_t> >());
+
+        int cur_repr_num_cutoff=min_repr_num;
+        if (xlen<=fast_lb) cur_repr_num_cutoff=max_repr_num;
+        else if (xlen>fast_lb && xlen<fast_ub) cur_repr_num_cutoff+=
+            (fast_ub-xlen)/(fast_ub-fast_lb)*(max_repr_num-min_repr_num);
+
+        index_vec.clear();
+        for (j=0;j<HwRMSDscore_list.size();j++)
+        {
+            TM=HwRMSDscore_list[j].first;
+            chain_j=HwRMSDscore_list[j].second;
+            ylen=xyz_vec[chain_j].size();
+            Lave=sqrt(xlen*ylen); // geometry average because O(L1*L2)
+            if (Lave>fast_lb && TM<TMcut*0.5 && 
+                index_vec.size()>=cur_repr_num_cutoff) break;
+            index_vec.push_back(chain_j);
+            cout<<"#"<<chain_j<<"\t"<<chainID_list[chain_j]<<"\t"
+                <<setiosflags(ios::fixed)<<setprecision(4)<<TM<<endl;
+        }
+        cout<<index_vec.size()<<" out of "
+            <<HwRMSDscore_list.size()<<" entries"<<endl;
+        HwRMSDscore_list.clear();
+#endif
+
+        found_clust=false;
+        for (j=0;j<index_vec.size();j++)
+        {
+            chain_j=index_vec[j];
+            ylen=xyz_vec[chain_j].size();
+            if (mol_vec[chain_i]*mol_vec[chain_j]<0)    continue;
+            else if (s_opt==2 && xlen<TMcut*ylen)       continue;
+            else if (s_opt==3 && xlen<(2*TMcut-1)*ylen) continue;
+            else if (s_opt==4 && xlen*(2/TMcut-1)<ylen) continue;
+            else if (s_opt==5 && xlen<TMcut*TMcut*ylen) continue;
+            else if (s_opt==6 && xlen*xlen<(2*TMcut*TMcut-1)*ylen*ylen) continue;
+            if (s_opt<=1) filter_lower_bound(lb_HwRMSD, lb_TMfast,
+                TMcut, s_opt, mol_vec[chain_i]+mol_vec[chain_j]);
+
+            NewArray(&ya, ylen, 3);
+            for (r=0;r<ylen;r++)
+            {
+                ya[r][0]=xyz_vec[chain_j][r][0];
+                ya[r][1]=xyz_vec[chain_j][r][1];
+                ya[r][2]=xyz_vec[chain_j][r][2];
+            }
+
+            Lave=sqrt(xlen*ylen); // geometry average because O(L1*L2)
+            bool overwrite_fast_opt=(fast_opt==true || Lave>=fast_ub);
+            
+            /* declare variable specific to this pair of TMalign */
+            double t0[3], u0[3][3];
+            double TM1, TM2;
+            double TM3, TM4, TM5;     // for s_opt, u_opt, d_opt
+            double d0_0, TM_0;
+            double d0A, d0B, d0u, d0a;
+            double d0_out=5.0;
+            string seqM, seqxA, seqyA;// for output alignment
+            double rmsd0 = 0.0;
+            int L_ali;                // Aligned length in standard_TMscore
+            double Liden=0;
+            double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+            int n_ali=0;
+            int n_ali8=0;
+            
+            /* entry function for structure alignment */
+            int status=TMalign_main(
+                xa, ya, &seq_vec[chain_i][0], &seq_vec[chain_j][0],
+                &sec_vec[chain_i][0], &sec_vec[chain_j][0],
+                t0, u0, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                i_opt, a_opt, u_opt, d_opt, overwrite_fast_opt,
+                mol_vec[chain_i]+mol_vec[chain_j],TMcut);
+
+            cout<<status<<'\t'<<chainID_list[chain_j]<<'\t'
+                <<setiosflags(ios::fixed)<<setprecision(4)
+                <<TM2<<'\t'<<TM1<<'\t'<<overwrite_fast_opt<<endl;
+
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            double TM=TM3; // average length
+            if      (s_opt==1) TM=TM2; // shorter length
+            else if (s_opt==2) TM=TM1; // longer length
+            else if (s_opt==3) TM=(TM1+TM2)/2;     // average TM
+            else if (s_opt==4) TM=2/(1/TM1+1/TM2); // harmonic average
+            else if (s_opt==5) TM=sqrt(TM1*TM2);   // geometric average
+            else if (s_opt==6) TM=sqrt((TM1*TM1+TM2*TM2)/2); // root mean square
+
+            if (TM<lb_TMfast || 
+               (TM<TMcut && (fast_opt || overwrite_fast_opt==false)))
+            {
+                DeleteArray(&ya, ylen);
+                continue;
+            }
+
+            if (TM>=ub_TMfast || 
+               (TM>=TMcut && (fast_opt || overwrite_fast_opt==false)))
+            {
+                clust_mem_vec[chain_i]=clust_repr_map[chain_j];
+                DeleteArray(&ya, ylen);
+                found_clust=true;
+                break;
+            }
+
+            if (TM<lb_TMfast && overwrite_fast_opt==false)
+            {
+                TMalign_main(
+                    xa, ya, &seq_vec[chain_i][0], &seq_vec[chain_j][0],
+                    &sec_vec[chain_i][0], &sec_vec[chain_j][0],
+                    t0, u0, TM1, TM2, TM3, TM4, TM5,
+                    d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                    seqM, seqxA, seqyA,
+                    rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                    xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                    i_opt, a_opt, u_opt, d_opt, false,
+                    mol_vec[chain_i]+mol_vec[chain_j],TMcut);
+                seqM.clear();
+                seqxA.clear();
+                seqyA.clear();
+                DeleteArray(&ya, ylen);
+                
+                TM=TM3;                // average length
+                if      (s_opt==1) TM=TM2; // shorter length
+                else if (s_opt==2) TM=TM1; // longer length
+                else if (s_opt==3) TM=(TM1+TM2)/2;     // average TM
+                else if (s_opt==4) TM=2/(1/TM1+1/TM2); // harmonic average
+                else if (s_opt==5) TM=sqrt(TM1*TM2);   // geometric average
+                else if (s_opt==6) TM=sqrt((TM1*TM1+TM2*TM2)/2); // root mean square
+                cout<<"*\t"<<chainID_list[chain_j]<<'\t'<<TM2<<'\t'<<TM1<<endl;
+                if (TM>=TMcut)
+                {
+                    clust_mem_vec[chain_i]=clust_repr_map[chain_j];
+                    found_clust=true;
+                    break;
+                }
+            }
+        }
+        DeleteArray(&xa, xlen);
+        index_vec.clear();
+
+        if (!found_clust) // new cluster
+        {
+            clust_mem_vec[chain_i]=clust_repr_vec.size();
+            clust_repr_map[chain_i]=clust_repr_vec.size();
+            clust_repr_vec.push_back(chain_i);
+        }
+        else // member structures are not used further
+        {
+            vector<char> ().swap(seq_vec[chain_i]);
+            vector<char> ().swap(sec_vec[chain_i]);
+            vector<vector<float> > ().swap(xyz_vec[chain_i]);
+        }
+    }
+
+    /* clean up */
+    mol_vec.clear();
+    xyz_vec.clear();
+    seq_vec.clear();
+    sec_vec.clear();
+
+    /* print out cluster */
+    stringstream txt;
+    for (j=0;j<clust_repr_vec.size();j++)
+    {
+        chain_j=clust_repr_vec[j]; // cluster representative
+        txt<<chainID_list[chain_j];
+        for (chain_i=0;chain_i<clust_mem_vec.size();chain_i++)
+        {
+            if (chain_i!=chain_j && clust_mem_vec[chain_i]==j)
+                txt<<'\t'<<chainID_list[chain_i];
+        }
+        txt<<'\n';
+    }
+    if (fname_clust.size() && fname_clust!="-")
+    {
+        ofstream fp(fname_clust.c_str());
+        fp<<txt.str();
+        fp.close();
+    }
+    else cout<<txt.str()<<endl;
+
+    /* clean up */
+    txt.str(string());
+    clust_repr_vec.clear();
+    clust_mem_vec.clear();
+    chainID_list.clear();
+    clust_repr_map.clear();
+
+    t2 = clock();
+    float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
+    printf("#Total CPU time is %5.2f seconds\n", diff);
+    return 0;
+}
diff --git a/modules/bindings/src/tmalign/readme.txt b/modules/bindings/src/USalign/readme.txt
similarity index 66%
rename from modules/bindings/src/tmalign/readme.txt
rename to modules/bindings/src/USalign/readme.txt
index 3249215e8..2a0330252 100644
--- a/modules/bindings/src/tmalign/readme.txt
+++ b/modules/bindings/src/USalign/readme.txt
@@ -1,15 +1,11 @@
 ==============================================================================
-   TM-align: protein and RNA structure alignment by TM-score superposition.
-
-   This program was written by (in reverse chronological order)
-   Chengxin Zhang, Sha Gong, Jianjie Wu, and Jianyi Yang
-   at Yang Zhang lab, Department of Computational Medicine and Bioinformatics,
-   University of Michigan, 100 Washtenaw Ave, Ann Arbor, MI 48109-2218.
-   Please report issues to yangzhanglab@umich.edu
+   US-align: universal structure alignment of monomeric and complex proteins
+   and nucleic acids
 
    References to cite:
-   S Gong, C Zhang, Y Zhang. Bioinformatics, btz282 (2019)
-   Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005)
+   (1) Chengxin Zhang, Morgan Shine, Anna Marie Pyle, Yang Zhang
+       (2022) Nat Methods
+   (2) Chengxin Zhang, Anna Marie Pyle (2022) iScience
 
    DISCLAIMER:
      Permission to use, copy, modify, and distribute this program for 
@@ -61,38 +57,53 @@
    2021/01/07: Fixed bug in TMscore -c
    2021/05/29: Remove unnecessary depedency on malloc.h, which prevent
                compilation on Mac OS
+   2021/08/17: Complete implementation of MMalign
+   2021/10/03: Support Windows
+   2022/02/27: Add -seq (-byresi 4 & 5) for TM-score superimposition guided by
+               sequence alignment.
+   2022/04/12: Support AlphaFold CIF
+   2022/05/11: Update -mm 4 output format
+   2022/05/24: Limited support for sequence order independent alignment
+   2022/05/30: Correct atom pair output for -mm 5
+   2022/06/07: Sequence order semi-independent alignment
+   2022/06/20: Sequentiality within SSE in sequence order semi-independent
+               alignment
+   2022/06/22: Fix infinite loop for mal-formatted PDB
+   2022/06/23: Fix -m for Windows. Add pymol plugin.
+   2022/06/26: Add -full option for -mm 2 and 4
+   2022/09/24: Support -TMscore for complex when the chain order is different
 ===============================================================================
 
 =========================
- How to install TM-align
+ How to install US-align
 =========================
 To compile the program in your Linux computer, simply enter
 
- make
+    make
 
 or
 
- g++ -static -O3 -ffast-math -lm -o TMalign TMalign.cpp
+    g++ -static -O3 -ffast-math -lm -o USalign USalign.cpp
 
 The '-static' flag should be removed on Mac OS, which does not support
 building static executables.
 
+USalign compiled on Linux, Mac OS and Linux Subsystem for Windows (WSL2) on
+Windows 10 onwards can read both uncompressed files and gz compressed
+files, provided that the "gunzip" command is available. On the other hand, due
+to the lack of POSIX support on Windows, US-align natively compiled on Windows
+without WSL2 cannot parse gz compressed files.
+
+US-align is known to be compilable by g++ version 4.8.5 or later, clang++
+version 12.0.5 or later and mingw-w64 version 9.3 or later.
+
 =====================
- How to use TM-align
+ How to use US-align
 =====================
 You can run the program without arguments to obtain a brief instruction
 
- ./TMalign structure1.pdb structure2.pdb
-
-===================
- Fortran version
-===================
-You can download the fortran version of TM-align from
-https://zhanglab.ccmb.med.umich.edu/TM-align/
+    ./USalign structure1.pdb structure2.pdb
 
-This C++ version of TM-align implemented several features not available in the
-fortran version, including RNA alignment and batch alignment of multiple 
-structures. A full list of available options can be explored by:
-  ./TMalign -h
+A full list of available options can be explored by:
 
-2021/05/20
+    ./USalign -h
diff --git a/modules/bindings/src/tmalign/se.cpp b/modules/bindings/src/USalign/se.cpp
similarity index 94%
rename from modules/bindings/src/tmalign/se.cpp
rename to modules/bindings/src/USalign/se.cpp
index c4d760681..af24ae78c 100644
--- a/modules/bindings/src/tmalign/se.cpp
+++ b/modules/bindings/src/USalign/se.cpp
@@ -48,12 +48,17 @@ void print_extra_help()
 "             2: tabular format very compact output\n"
 "\n"
 "    -byresi  Whether to align two structures by residue index.\n"
+"             The same as -TMscore.\n"
 "             0: (default) do not align by residue index\n"
 "             1: (same as TMscore program) align by residue index\n"
 "             2: (same as TMscore -c, should be used with -ter <=1)\n"
 "                align by residue index and chain ID\n"
 "             3: (similar to TMscore -c, should be used with -ter <=1)\n"
 "                align by residue index and order of chain\n"
+"             4: sequence dependent alignment: perform Needleman-Wunsch\n"
+"                global sequence alignment\n"
+"             5: sequence dependent alignment: perform glocal sequence\n"
+"                alignment\n"
 "\n"
 "    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
 "             0: (default) only align 'ATOM  ' residues\n"
@@ -208,7 +213,8 @@ int main(int argc, char *argv[])
         {
             outfmt_opt=atoi(argv[i + 1]); i++;
         }
-        else if ( !strcmp(argv[i],"-byresi") && i < (argc-1) )
+        else if ( (!strcmp(argv[i],"-byresi") || !strcmp(argv[i],"-TMscore") ||
+                   !strcmp(argv[i],"-tmscore") ) && i < (argc-1) )
         {
             byresi_opt=atoi(argv[i + 1]); i++;
         }
@@ -255,10 +261,10 @@ int main(int argc, char *argv[])
     {
         if (i_opt)
             PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I");
-        if (byresi_opt<0 || byresi_opt>3)
-            PrintErrorAndQuit("-byresi can only be 0, 1, 2 or 3");
-        if (byresi_opt>=2 && ter_opt>=2)
-            PrintErrorAndQuit("-byresi >=2 should be used with -ter <=1");
+        if (byresi_opt<0 || byresi_opt>5)
+            PrintErrorAndQuit("-byresi can only be 0, 1, 2, 3, 4, or 5");
+        if (byresi_opt>=2 && byresi_opt<=3 && ter_opt>=2)
+            PrintErrorAndQuit("-byresi 2 and -byresi 3 should be used with -ter <=1");
     }
     if (split_opt==1 && ter_opt!=0)
         PrintErrorAndQuit("-split 1 should be used with -ter 0");
@@ -398,7 +404,7 @@ int main(int argc, char *argv[])
                         outfmt_opt, invmap);
 
                     if (outfmt_opt>=2) 
-                        get_seqID(invmap, seqx, seqy, ylen, Liden, n_ali8);
+                        get_seqID(invmap, seqx, seqy, ylen, Liden, n_ali);
 
                     /* print result */
                     output_results(
diff --git a/modules/bindings/src/tmalign/se.h b/modules/bindings/src/USalign/se.h
similarity index 73%
rename from modules/bindings/src/tmalign/se.h
rename to modules/bindings/src/USalign/se.h
index 6ccc84132..27eb3b48c 100644
--- a/modules/bindings/src/tmalign/se.h
+++ b/modules/bindings/src/USalign/se.h
@@ -1,7 +1,10 @@
 #include "TMalign.h"
 
 /* entry function for se
- * outfmt_opt>=2 should not parse sequence alignment */
+ * outfmt_opt>=2 should not parse sequence alignment 
+ * u_opt corresponds to option -L
+ *       if u_opt==2, use d0 from Lnorm_ass for alignment
+ * if hinge>0, append to original invmap */
 int se_main(
     double **xa, double **ya, const char *seqx, const char *seqy,
     double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
@@ -12,8 +15,8 @@ int se_main(
     double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
     const int xlen, const int ylen, const vector<string> &sequence,
     const double Lnorm_ass, const double d0_scale, const bool i_opt,
-    const bool a_opt, const bool u_opt, const bool d_opt, const int mol_type,
-    const int outfmt_opt, int *invmap)
+    const bool a_opt, const int u_opt, const bool d_opt, const int mol_type,
+    const int outfmt_opt, int *invmap, const int hinge=0)
 {
     double D0_MIN;        //for d0
     double Lnorm;         //normalization length
@@ -37,7 +40,21 @@ int se_main(
     NewArray(&score, xlen+1, ylen+1);
     NewArray(&path, xlen+1, ylen+1);
     NewArray(&val, xlen+1, ylen+1);
-    //int *invmap          = new int[ylen+1];
+    int *invmap0          = new int[ylen+1];
+    int i,j;
+    if (hinge==0) for (j=0;j<=ylen;j++) invmap0[j]=-1;
+    else for (j=0;j<ylen;j++) invmap0[j]=invmap[j];
+    vector<char> seqM_char;
+    if (hinge)
+    {
+        seqM_char.assign(ylen,hinge+'0');
+        j=-1;
+        for (int r=0;r<seqM.size();r++)
+        {
+            j+=seqyA[r]!='-';
+            if (seqM[r]!=' ') seqM_char[j]=seqM[r];
+        }
+    }
 
     /* set d0 */
     parameter_set4search(xlen, ylen, D0_MIN, Lnorm,
@@ -50,12 +67,19 @@ int se_main(
         parameter_set4final((xlen+ylen)*0.5, D0_MIN, Lnorm,
             d0a, d0_search, mol_type); // set d0a
     if (u_opt)
+    {
         parameter_set4final(Lnorm_ass, D0_MIN, Lnorm,
             d0u, d0_search, mol_type); // set d0u
+        if (u_opt==2)
+        {
+            parameter_set4search(Lnorm_ass, Lnorm_ass, D0_MIN, Lnorm,
+                score_d8, d0, d0_search, dcu0); // set score_d8
+        }
+    }
 
     /* perform alignment */
-    for(int j=0; j<ylen; j++) invmap[j]=-1;
-    if (!i_opt) NWDP_SE(path, val, xa, ya, xlen, ylen, d0*d0, 0, invmap);
+    if (hinge==0) for(j=0; j<ylen; j++) invmap[j]=-1;
+    if (!i_opt) NWDP_SE(path, val, xa, ya, xlen, ylen, d0*d0, 0, invmap, hinge);
     else
     {
         int i1 = -1;// in C version, index starts from zero, not from one
@@ -74,8 +98,17 @@ int se_main(
             }
         }
     }
-
-    rmsd0=TM1=TM2=TM3=TM4=TM5=0;
+    
+    if (hinge==0) rmsd0=TM1=TM2=TM3=TM4=TM5=0;
+    else
+    {
+        TM2*=xlen;
+        TM1*=ylen;
+        TM3*=(xlen+ylen)*0.5;
+        TM4*=Lnorm_ass;
+        TM5*=ylen;
+        rmsd0=rmsd0*rmsd0*n_ali8;
+    }
     int k=0;
     n_ali=0;
     n_ali8=0;
@@ -86,7 +119,7 @@ int se_main(
         {
             n_ali++;
             d=sqrt(dist(&xa[i][0], &ya[j][0]));
-            if (d <= score_d8 || i_opt)
+            if (d <= score_d8 || i_opt || invmap0[j]==i)
             {
                 if (outfmt_opt<2)
                 {
@@ -94,6 +127,7 @@ int se_main(
                     m2[k]=j;
                 }
                 k++;
+                if (invmap0[j]==i) continue;
                 TM2+=1/(1+(d/d0B)*(d/d0B)); // chain_1
                 TM1+=1/(1+(d/d0A)*(d/d0A)); // chain_2
                 if (a_opt) TM3+=1/(1+(d/d0a)*(d/d0a)); // -a
@@ -101,6 +135,7 @@ int se_main(
                 if (d_opt) TM5+=1/(1+(d/d0_scale)*(d/d0_scale)); // -d
                 rmsd0+=d*d;
             }
+            else if (hinge) invmap[j]=-1;
         }
     }
     n_ali8=k;
@@ -113,6 +148,8 @@ int se_main(
 
     if (outfmt_opt>=2)
     {
+        if (hinge) seqM_char.clear();    
+        delete []invmap0;
         DeleteArray(&score, xlen+1);
         DeleteArray(&path, xlen+1);
         DeleteArray(&val, xlen+1);
@@ -179,9 +216,18 @@ int se_main(
     seqxA=seqxA.substr(0,kk);
     seqyA=seqyA.substr(0,kk);
     seqM =seqM.substr(0,kk);
+    if (hinge)
+    {
+        j=-1;
+        for (int r=0;r<seqM.size();r++)
+        {
+            j+=seqyA[r]!='-';
+            if (seqM[r]!=' ') seqM[r]=seqM_char[j];
+        }
+    }
 
     /* free memory */
-    //delete [] invmap;
+    delete [] invmap0;
     delete [] m1;
     delete [] m2;
     DeleteArray(&score, xlen+1);
diff --git a/modules/bindings/src/USalign/usalign.py b/modules/bindings/src/USalign/usalign.py
new file mode 100644
index 000000000..fc9ddd3df
--- /dev/null
+++ b/modules/bindings/src/USalign/usalign.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env pymol
+'''
+PyMOL plugin for US-align
+
+USAGE: 
+
+    usalign mobile, fix [,args [,exe]]
+
+INSTALLATION
+
+    Install this script as a PyMOL plugin by 
+    "Plugin" - "Plugin Manager" - "Install New Plugin"
+
+    This plugin depends on the binary executable of US-align, which must be
+    available within a directory specified by PATH. You can get the PATH
+    value within PyMOL by the following command:
+    
+    print(os.getenv('PATH'))
+'''
+#This script is partly based on tmalign plugin by Thomas Holder available at
+#https://github.com/Pymol-Scripts/Pymol-script-repo/blob/master/tmalign.py
+
+from __future__ import print_function
+
+__author__ = 'Chengxin Zhang'
+__version__ = '20220924'
+__license__ = 'BSD-2-Clause'
+
+from pymol import cmd, CmdException
+import subprocess
+import tempfile
+import os
+import platform
+
+def get_usalign_path(exe="USalign"):
+    if platform.system().lower().startswith("win"):
+        exe+=".exe"
+    filename = os.path.join(os.path.dirname(os.path.abspath(__file__)),exe)
+    if os.path.isfile(filename):
+        return filename
+    else:
+        for p in os.getenv("PATH").split(os.pathsep):
+            filename=os.path.join(p,exe)
+            if os.path.isfile(filename):
+                return filename
+    print("ERROR! Cannot locate %s at %s or at %s"%(exe,
+        os.path.dirname(os.path.abspath(__file__)),os.getenv("PATH")))
+    print("Please put the USalign executable at one of the aforementioned paths")
+    return exe
+
+def usalign(mobile, target, args='', exe='', transform=1):
+    '''
+USAGE
+
+    usalign mobile, target [, args [, exe ]]
+
+ARGUMENTS
+
+    mobile, target = string: atom selections
+
+    args = string: Extra arguments such as -mm and -byresi
+
+    exe = string: Path to USalign executable {default: USalign}
+
+CITATION
+
+    Zhang C, Shine M, Pyle AM, Zhang Y. bioRxiv 2022.04.18.488565.
+    https://github.com/pylelab/USalign
+    '''
+
+    mobile_filename = tempfile.mktemp('.pdb', 'mobile')
+    target_filename = tempfile.mktemp('.pdb', 'target')
+    mobile_ca_sele = '(%s) and (not hetatm) and alt +A' % (mobile)
+    target_ca_sele = '(%s) and (not hetatm) and alt +A' % (target)
+    if not "-atom" in args:
+        mobile_ca_sele+=" and (name CA or name C3')"
+        target_ca_sele+=" and (name CA or name C3')"
+
+    cmd.save(mobile_filename, mobile_ca_sele)
+    cmd.save(target_filename, target_ca_sele)
+
+    if len(exe)==0:
+        exe=get_usalign_path("USalign")
+    if args=='""':
+        args=''
+    if len(args)>2 and args[0]=='"' and args[-1]=='"':
+        args=args[1:-1]
+    if not "-outfmt" in args:
+        args+=" -outfmt -1"
+    args = ' '.join([exe, mobile_filename, target_filename, args, '-m -'])
+    print(args)
+
+    try:
+        process = subprocess.Popen(args, stdout=subprocess.PIPE, shell=True,
+                universal_newlines=True)
+        lines = process.stdout.readlines()
+    except OSError:
+        print('Cannot execute "%s", please provide full path to USalign executable' % (args))
+        raise CmdException
+    finally:
+        os.remove(mobile_filename)
+        os.remove(target_filename)
+
+    rowcount = 0
+    matrix = []
+    for line in iter(lines):
+        print(line.rstrip())
+        if line.strip().startswith('------ The rotation matrix to rotate '):
+            rowcount = 1
+        elif 4 >= rowcount and rowcount> 0:
+            if rowcount >= 2:
+                a = list(map(float, line.split()))
+                matrix.extend(a[2:5])
+                matrix.append(a[1])
+            rowcount += 1
+
+    assert len(matrix) == 3 * 4
+    matrix.extend([0, 0, 0, 1])
+
+    if int(transform):
+        cmd.transform_selection('byobject (%s)' % (mobile), matrix, homogenous=1)
+    return
+
+# pymol commands
+cmd.extend('usalign', usalign)
+cmd.extend('USalign', usalign)
+
+# autocompletion
+cmd.auto_arg[0].update({ 'usalign': cmd.auto_arg[0]['align'], })
+cmd.auto_arg[1].update({ 'usalign': cmd.auto_arg[1]['align'], })
+cmd.auto_arg[0].update({ 'USalign': cmd.auto_arg[0]['align'], })
+cmd.auto_arg[1].update({ 'USalign': cmd.auto_arg[1]['align'], })
diff --git a/modules/bindings/src/tmalign/xyz_sfetch.cpp b/modules/bindings/src/USalign/xyz_sfetch.cpp
similarity index 83%
rename from modules/bindings/src/tmalign/xyz_sfetch.cpp
rename to modules/bindings/src/USalign/xyz_sfetch.cpp
index 5d413d5c5..4cf057605 100644
--- a/modules/bindings/src/tmalign/xyz_sfetch.cpp
+++ b/modules/bindings/src/USalign/xyz_sfetch.cpp
@@ -84,15 +84,27 @@ int main(int argc, char *argv[])
 
     /* read entry list */
     vector<string> chain_list;
-    ifstream fp(list_opt.c_str());
-    while (fp.good())
+    ifstream fp;
+    if (list_opt=="-")
     {
-        getline(fp, line);
-        for (i=0;i<line.size();i++)
-            if (line[i]==' '||line[i]=='\t') break;
-        if (line.size() && i) chain_list.push_back(line.substr(0,i));
+        while (cin.good())
+        {
+            getline(cin, line);
+            for (i=0;i<line.size();i++) if (line[i]==' '||line[i]=='\t') break;
+            if (line.size() && i) chain_list.push_back(line.substr(0,i));
+        }
+    }
+    else
+    {
+        fp.open(list_opt.c_str(),ios::in);
+        while (fp.good())
+        {
+            getline(fp, line);
+            for (i=0;i<line.size();i++) if (line[i]==' '||line[i]=='\t') break;
+            if (line.size() && i) chain_list.push_back(line.substr(0,i));
+        }
+        fp.close();
     }
-    fp.close();
 
     /* read xyz index */
     /* In xyz file, each line has 28 chacters plus an additional '\n'. In PDB
@@ -128,6 +140,6 @@ int main(int argc, char *argv[])
     delete[]buf;
     filename.clear();
     list_opt.clear();
-    chain_list.clear();
+    vector<string>().swap(chain_list);
     return 0;
 }
diff --git a/modules/bindings/src/tmalign/.gitignore b/modules/bindings/src/tmalign/.gitignore
deleted file mode 100644
index 4dbbc7f99..000000000
--- a/modules/bindings/src/tmalign/.gitignore
+++ /dev/null
@@ -1,17 +0,0 @@
-# compiled python code
-*.pyc
-
-# vim temporary backup
-.*.sw*
-
-# binary executables
-TMalign
-TMalignc
-pdb2xyz
-pdb2fasta
-pdb2ss
-xyz_sfetch
-se
-qTMclust
-NWalign
-HwRMSD
diff --git a/modules/bindings/src/tmalign/MMalign.h b/modules/bindings/src/tmalign/MMalign.h
deleted file mode 100644
index af9920a8c..000000000
--- a/modules/bindings/src/tmalign/MMalign.h
+++ /dev/null
@@ -1,1194 +0,0 @@
-#include "se.h"
-
-/* count the number of nucleic acid chains (na_chain_num) and
- * protein chains (aa_chain_num) in a complex */
-int count_na_aa_chain_num(int &na_chain_num,int &aa_chain_num,
-    const vector<int>&mol_vec)
-{
-    na_chain_num=0;
-    aa_chain_num=0;
-    for (size_t i=0;i<mol_vec.size();i++)
-    {
-        if (mol_vec[i]>0) na_chain_num++;
-        else              aa_chain_num++;
-    }
-    return na_chain_num+aa_chain_num;
-}
-
-/* adjust chain assignment for dimer-dimer alignment 
- * return true if assignment is adjusted */
-bool adjust_dimer_assignment(        
-    const vector<vector<vector<double> > >&xa_vec,
-    const vector<vector<vector<double> > >&ya_vec,
-    const vector<int>&xlen_vec, const vector<int>&ylen_vec,
-    const vector<int>&mol_vec1, const vector<int>&mol_vec2,
-    int *assign1_list, int *assign2_list,
-    const vector<vector<string> >&seqxA_mat,
-    const vector<vector<string> >&seqyA_mat)
-{
-    /* check currently assigned chains */
-    int i1,i2,j1,j2;
-    i1=i2=j1=j2=-1;    
-    int chain1_num=xa_vec.size();
-    int i,j;
-    for (i=0;i<chain1_num;i++)
-    {
-        if (assign1_list[i]>=0)
-        {
-            if (i1<0)
-            {
-                i1=i;
-                j1=assign1_list[i1];
-            }
-            else
-            {
-                i2=i;
-                j2=assign1_list[i2];
-            }
-        }
-    }
-
-    /* normalize d0 by L */
-    int xlen=xlen_vec[i1]+xlen_vec[i2];
-    int ylen=ylen_vec[j1]+ylen_vec[j2];
-    int mol_type=mol_vec1[i1]+mol_vec1[i2]+
-                 mol_vec2[j1]+mol_vec2[j2];
-    double D0_MIN, d0, d0_search;
-    double Lnorm=getmin(xlen,ylen);
-    parameter_set4final(getmin(xlen,ylen), D0_MIN, Lnorm, d0, 
-        d0_search, mol_type);
-
-    double **xa,**ya, **xt;
-    NewArray(&xa, xlen, 3);
-    NewArray(&ya, ylen, 3);
-    NewArray(&xt, xlen, 3);
-
-    double RMSD = 0;
-    double dd   = 0;
-    double t[3];
-    double u[3][3];
-    size_t L_ali=0; // index of residue in aligned region
-    size_t r=0;     // index of residue in full alignment
-
-    /* total score using current assignment */
-    L_ali=0;
-    i=j=-1;
-    for (r=0;r<seqxA_mat[i1][j1].size();r++)
-    {
-        i+=(seqxA_mat[i1][j1][r]!='-');
-        j+=(seqyA_mat[i1][j1][r]!='-');
-        if (seqxA_mat[i1][j1][r]=='-' || seqyA_mat[i1][j1][r]=='-') continue;
-        xa[L_ali][0]=xa_vec[i1][i][0];
-        xa[L_ali][1]=xa_vec[i1][i][1];
-        xa[L_ali][2]=xa_vec[i1][i][2];
-        ya[L_ali][0]=ya_vec[j1][j][0];
-        ya[L_ali][1]=ya_vec[j1][j][1];
-        ya[L_ali][2]=ya_vec[j1][j][2];
-        L_ali++;
-    }
-    i=j=-1;
-    for (r=0;r<seqxA_mat[i2][j2].size();r++)
-    {
-        i+=(seqxA_mat[i2][j2][r]!='-');
-        j+=(seqyA_mat[i2][j2][r]!='-');
-        if (seqxA_mat[i2][j2][r]=='-' || seqyA_mat[i2][j2][r]=='-') continue;
-        xa[L_ali][0]=xa_vec[i2][i][0];
-        xa[L_ali][1]=xa_vec[i2][i][1];
-        xa[L_ali][2]=xa_vec[i2][i][2];
-        ya[L_ali][0]=ya_vec[j2][j][0];
-        ya[L_ali][1]=ya_vec[j2][j][1];
-        ya[L_ali][2]=ya_vec[j2][j][2];
-        L_ali++;
-    }
-
-    Kabsch(xa, ya, L_ali, 1, &RMSD, t, u);
-    do_rotation(xa, xt, L_ali, t, u);
-
-    double total_score1=0;
-    for (r=0;r<L_ali;r++)
-    {
-        dd=dist(xt[r],ya[r]);
-        total_score1+=1/(1+dd/d0*d0);
-    }
-    total_score1/=Lnorm;
-
-    /* total score using reversed assignment */
-    L_ali=0;
-    i=j=-1;
-    for (r=0;r<seqxA_mat[i1][j2].size();r++)
-    {
-        i+=(seqxA_mat[i1][j2][r]!='-');
-        j+=(seqyA_mat[i1][j2][r]!='-');
-        if (seqxA_mat[i1][j2][r]=='-' || seqyA_mat[i1][j2][r]=='-') continue;
-        xa[L_ali][0]=xa_vec[i1][i][0];
-        xa[L_ali][1]=xa_vec[i1][i][1];
-        xa[L_ali][2]=xa_vec[i1][i][2];
-        ya[L_ali][0]=ya_vec[j2][j][0];
-        ya[L_ali][1]=ya_vec[j2][j][1];
-        ya[L_ali][2]=ya_vec[j2][j][2];
-        L_ali++;
-    }
-    i=j=-1;
-    for (r=0;r<seqxA_mat[i2][j1].size();r++)
-    {
-        i+=(seqxA_mat[i2][j1][r]!='-');
-        j+=(seqyA_mat[i2][j1][r]!='-');
-        if (seqxA_mat[i2][j1][r]=='-' || seqyA_mat[i2][j1][r]=='-') continue;
-        xa[L_ali][0]=xa_vec[i2][i][0];
-        xa[L_ali][1]=xa_vec[i2][i][1];
-        xa[L_ali][2]=xa_vec[i2][i][2];
-        ya[L_ali][0]=ya_vec[j1][j][0];
-        ya[L_ali][1]=ya_vec[j1][j][1];
-        ya[L_ali][2]=ya_vec[j1][j][2];
-        L_ali++;
-    }
-
-    Kabsch(xa, ya, L_ali, 1, &RMSD, t, u);
-    do_rotation(xa, xt, L_ali, t, u);
-
-    double total_score2=0;
-    for (r=0;r<L_ali;r++)
-    {
-        dd=dist(xt[r],ya[r]);
-        total_score2+=1/(1+dd/d0*d0);
-    }
-    total_score2/=Lnorm;
-
-    /* swap chain assignment */
-    if (total_score1<total_score2)
-    {
-        assign1_list[i1]=j2;
-        assign1_list[i2]=j1;
-        assign2_list[j1]=i2;
-        assign2_list[j2]=i1;
-    }
-
-    /* clean up */
-    DeleteArray(&xa, xlen);
-    DeleteArray(&ya, ylen);
-    DeleteArray(&xt, xlen);
-    return total_score1<total_score2;
-}
-
-/* assign chain-chain correspondence */
-double enhanced_greedy_search(double **TMave_mat,int *assign1_list,
-    int *assign2_list, const int chain1_num, const int chain2_num)
-{
-    double total_score=0;
-    double tmp_score=0;
-    int i,j;
-    int maxi=0;
-    int maxj=0;
-
-    /* initialize parameters */
-    for (i=0;i<chain1_num;i++) assign1_list[i]=-1;
-    for (j=0;j<chain2_num;j++) assign2_list[j]=-1;
-
-    /* greedy assignment: in each iteration, the highest chain pair is
-     * assigned, until no assignable chain is left */
-    while(1)
-    {
-        tmp_score=-1;
-        for (i=0;i<chain1_num;i++)
-        {
-            if (assign1_list[i]>=0) continue;
-            for (j=0;j<chain2_num;j++)
-            {
-                if (assign2_list[j]>=0 || TMave_mat[i][j]<=0) continue;
-                if (TMave_mat[i][j]>tmp_score) 
-                {
-                    maxi=i;
-                    maxj=j;
-                    tmp_score=TMave_mat[i][j];
-                }
-            }
-        }
-        if (tmp_score<=0) break; // error: no assignable chain
-        assign1_list[maxi]=maxj;
-        assign2_list[maxj]=maxi;
-        total_score+=tmp_score;
-    }
-    if (total_score<=0) return total_score; // error: no assignable chain
-    //cout<<"assign1_list={";
-    //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
-    //cout<<"assign2_list={";
-    //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
-
-    /* iterative refinemnt */
-    double delta_score;
-    int *assign1_tmp=new int [chain1_num];
-    int *assign2_tmp=new int [chain2_num];
-    for (i=0;i<chain1_num;i++) assign1_tmp[i]=assign1_list[i];
-    for (j=0;j<chain2_num;j++) assign2_tmp[j]=assign2_list[j];
-    int old_i=-1;
-    int old_j=-1;
-
-    for (int iter=0;iter<getmin(chain1_num,chain2_num)*5;iter++)
-    {
-        delta_score=-1;
-        for (i=0;i<chain1_num;i++)
-        {
-            old_j=assign1_list[i];
-            for (j=0;j<chain2_num;j++)
-            {
-                // attempt to swap (i,old_j=assign1_list[i]) with (i,j)
-                if (j==assign1_list[i] || TMave_mat[i][j]<=0) continue;
-                old_i=assign2_list[j];
-
-                assign1_tmp[i]=j;
-                if (old_i>=0) assign1_tmp[old_i]=old_j;
-                assign2_tmp[j]=i;
-                if (old_j>=0) assign2_tmp[old_j]=old_i;
-
-                delta_score=TMave_mat[i][j];
-                if (old_j>=0) delta_score-=TMave_mat[i][old_j];
-                if (old_i>=0) delta_score-=TMave_mat[old_i][j];
-                if (old_i>=0 && old_j>=0) delta_score+=TMave_mat[old_i][old_j];
-
-                if (delta_score>0) // successful swap
-                {
-                    assign1_list[i]=j;
-                    if (old_i>=0) assign1_list[old_i]=old_j;
-                    assign2_list[j]=i;
-                    if (old_j>=0) assign2_list[old_j]=old_i;
-                    total_score+=delta_score;
-                    break;
-                }
-                else
-                {
-                    assign1_tmp[i]=assign1_list[i];
-                    if (old_i>=0) assign1_tmp[old_i]=assign1_list[old_i];
-                    assign2_tmp[j]=assign2_list[j];
-                    if (old_j>=0) assign2_tmp[old_j]=assign2_list[old_j];
-                }
-            }
-            if (delta_score>0) break;
-        }
-        if (delta_score<=0) break; // cannot swap any chain pair
-    }
-
-    /* clean up */
-    delete[]assign1_tmp;
-    delete[]assign2_tmp;
-    return total_score;
-}
-
-double calculate_centroids(const vector<vector<vector<double> > >&a_vec,
-    const int chain_num, double ** centroids)
-{
-    int L=0;
-    int c,r; // index of chain and residue
-    for (c=0; c<chain_num; c++)
-    {
-        centroids[c][0]=0;
-        centroids[c][1]=0;
-        centroids[c][2]=0;
-        L=a_vec[c].size();
-        for (r=0; r<L; r++)
-        {
-            centroids[c][0]+=a_vec[c][r][0];
-            centroids[c][1]+=a_vec[c][r][1];
-            centroids[c][2]+=a_vec[c][r][2];
-        }
-        centroids[c][0]/=L;
-        centroids[c][1]/=L;
-        centroids[c][2]/=L;
-        //cout<<centroids[c][0]<<'\t'
-            //<<centroids[c][1]<<'\t'
-            //<<centroids[c][2]<<endl;
-    }
-
-    vector<double> d0_vec(chain_num,-1);
-    int c2=0;
-    double d0MM=0;
-    for (c=0; c<chain_num; c++)
-    {
-        for (c2=0; c2<chain_num; c2++)
-        {
-            if (c2==c) continue;
-            d0MM=sqrt(dist(centroids[c],centroids[c2]));
-            if (d0_vec[c]<=0) d0_vec[c]=d0MM;
-            else d0_vec[c]=getmin(d0_vec[c], d0MM);
-        }
-    }
-    d0MM=0;
-    for (c=0; c<chain_num; c++) d0MM+=d0_vec[c];
-    d0MM/=chain_num;
-    d0_vec.clear();
-    //cout<<d0MM<<endl;
-    return d0MM;
-}
-
-/* calculate MMscore of aligned chains
- * MMscore = sum(TMave_mat[i][j]) * sum(1/(1+dij^2/d0MM^2)) 
- *         / (L* getmin(chain1_num,chain2_num))
- * dij is the centroid distance between chain pair i and j
- * d0MM is scaling factor. TMave_mat[i][j] is the TM-score between
- * chain pair i and j multiple by getmin(Li*Lj) */
-double calMMscore(double **TMave_mat,int *assign1_list,
-    const int chain1_num, const int chain2_num, double **xcentroids,
-    double **ycentroids, const double d0MM, double **r1, double **r2,
-    double **xt, double t[3], double u[3][3], const int L)
-{
-    int Nali=0; // number of aligned chain
-    int i,j;
-    double MMscore=0;
-    for (i=0;i<chain1_num;i++)
-    {
-        j=assign1_list[i];
-        if (j<0) continue;
-
-        r1[Nali][0]=xcentroids[i][0];
-        r1[Nali][1]=xcentroids[i][1];
-        r1[Nali][2]=xcentroids[i][2];
-
-        r2[Nali][0]=ycentroids[j][0];
-        r2[Nali][1]=ycentroids[j][1];
-        r2[Nali][2]=ycentroids[j][2];
-
-        Nali++;
-        MMscore+=TMave_mat[i][j];
-    }
-    MMscore/=L;
-
-    double RMSD = 0;
-    double TMscore=0;
-    if (Nali>=3)
-    {
-        /* Kabsch superposition */
-        Kabsch(r1, r2, Nali, 1, &RMSD, t, u);
-        do_rotation(r1, xt, Nali, t, u);
-
-        /* calculate pseudo-TMscore */
-        double dd=0;
-        for (i=0;i<Nali;i++)
-        {
-            dd=dist(xt[i], r2[i]);
-            TMscore+=1/(1+dd/(d0MM*d0MM));
-        }
-    }
-    else if (Nali==2)
-    {
-        double dd=dist(r1[0],r2[0]);
-        TMscore=1/(1+dd/(d0MM*d0MM));
-    }
-    else TMscore=1; // only one aligned chain.
-    TMscore/=getmin(chain1_num,chain2_num);
-    MMscore*=TMscore;
-    return MMscore;
-}
-
-/* check if this is alignment of heterooligomer or homooligomer
- * return het_deg, which ranges from 0 to 1.
- * The larger the value, the more "hetero"; 
- * Tthe smaller the value, the more "homo" */
-double check_heterooligomer(double **TMave_mat, const int chain1_num,
-    const int chain2_num)
-{
-    double het_deg=0;
-    double min_TM=-1;
-    double max_TM=-1;
-    int i,j;
-    for (i=0;i<chain1_num;i++)
-    {
-        for (j=0;j<chain2_num;j++)
-        {
-            if (min_TM<0 || TMave_mat[i][j] <min_TM) min_TM=TMave_mat[i][j];
-            if (max_TM<0 || TMave_mat[i][j]>=max_TM) max_TM=TMave_mat[i][j];
-        }
-    }
-    het_deg=(max_TM-min_TM)/max_TM;
-    //cout<<"min_TM="<<min_TM<<endl;
-    //cout<<"max_TM="<<max_TM<<endl;
-    return het_deg;
-}
-
-/* reassign chain-chain correspondence, specific for homooligomer */
-double homo_refined_greedy_search(double **TMave_mat,int *assign1_list,
-    int *assign2_list, const int chain1_num, const int chain2_num,
-    double **xcentroids, double **ycentroids, const double d0MM,
-    const int L, double **ut_mat)
-{
-    double MMscore_max=0;
-    double MMscore=0;
-    int i,j;
-    int c1,c2;
-    int max_i=-1; // the chain pair whose monomer u t yields highest MMscore
-    int max_j=-1;
-
-    int chain_num=getmin(chain1_num,chain2_num);
-    int *assign1_tmp=new int [chain1_num];
-    int *assign2_tmp=new int [chain2_num];
-    double **xt;
-    NewArray(&xt, chain1_num, 3);
-    double t[3];
-    double u[3][3];
-    int ui,uj,ut_idx;
-    double TMscore=0; // pseudo TM-score
-    double TMsum  =0;
-    double TMnow  =0;
-    double TMmax  =0;
-    double dd=0;
-
-    size_t  total_pair=chain1_num*chain2_num; // total pair
-    double *ut_tmc_mat=new double [total_pair]; // chain level TM-score
-    vector<pair<double,int> > ut_tm_vec(total_pair,make_pair(0.0,0)); // product of both
-
-    for (c1=0;c1<chain1_num;c1++)
-    {
-        for (c2=0;c2<chain2_num;c2++)
-        {
-            if (TMave_mat[c1][c2]<=0) continue;
-            ut_idx=c1*chain2_num+c2;
-            for (ui=0;ui<3;ui++)
-                for (uj=0;uj<3;uj++) u[ui][uj]=ut_mat[ut_idx][ui*3+uj];
-            for (uj=0;uj<3;uj++) t[uj]=ut_mat[ut_idx][9+uj];
-            
-            do_rotation(xcentroids, xt, chain1_num, t, u);
-
-            for (i=0;i<chain1_num;i++) assign1_tmp[i]=-1;
-            for (j=0;j<chain2_num;j++) assign2_tmp[j]=-1;
-
-
-            for (i=0;i<chain1_num;i++)
-            {
-                for (j=0;j<chain2_num;j++)
-                {
-                    ut_idx=i*chain2_num+j;
-                    ut_tmc_mat[ut_idx]=0;
-                    ut_tm_vec[ut_idx].first=-1;
-                    ut_tm_vec[ut_idx].second=ut_idx;
-                    if (TMave_mat[i][j]<=0) continue;
-                    dd=dist(xt[i],ycentroids[j]);
-                    ut_tmc_mat[ut_idx]=1/(1+dd/(d0MM*d0MM));
-                    ut_tm_vec[ut_idx].first=
-                        ut_tmc_mat[ut_idx]*TMave_mat[i][j];
-                    //cout<<"TM["<<ut_idx<<"]="<<ut_tm_vec[ut_idx].first<<endl;
-                }
-            }
-            //cout<<"sorting "<<total_pair<<" chain pairs"<<endl;
-
-            /* initial assignment */
-            assign1_tmp[c1]=c2;
-            assign2_tmp[c2]=c1;
-            TMsum=TMave_mat[c1][c2];
-            TMscore=ut_tmc_mat[c1*chain2_num+c2];
-
-            /* further assignment */
-            sort(ut_tm_vec.begin(), ut_tm_vec.end()); // sort in ascending order
-            for (ut_idx=total_pair-1;ut_idx>=0;ut_idx--)
-            {
-                j=ut_tm_vec[ut_idx].second % chain2_num;
-                i=int(ut_tm_vec[ut_idx].second / chain2_num);
-                if (TMave_mat[i][j]<=0) break;
-                if (assign1_tmp[i]>=0 || assign2_tmp[j]>=0) continue;
-                assign1_tmp[i]=j;
-                assign2_tmp[j]=i;
-                TMsum+=TMave_mat[i][j];
-                TMscore+=ut_tmc_mat[i*chain2_num+j];
-                //cout<<"ut_idx="<<ut_tm_vec[ut_idx].second
-                    //<<"\ti="<<i<<"\tj="<<j<<"\ttm="<<ut_tm_vec[ut_idx].first<<endl;
-            }
-
-            /* final MMscore */
-            MMscore=(TMsum/L)*(TMscore/chain_num);
-            if (max_i<0 || max_j<0 || MMscore>MMscore_max)
-            {
-                max_i=c1;
-                max_j=c2;
-                MMscore_max=MMscore;
-                for (i=0;i<chain1_num;i++) assign1_list[i]=assign1_tmp[i];
-                for (j=0;j<chain2_num;j++) assign2_list[j]=assign2_tmp[j];
-                //cout<<"TMsum/L="<<TMsum/L<<endl;
-                //cout<<"TMscore/chain_num="<<TMscore/chain_num<<endl;
-                //cout<<"MMscore="<<MMscore<<endl;
-                //cout<<"assign1_list={";
-                //for (i=0;i<chain1_num;i++) 
-                    //cout<<assign1_list[i]<<","; cout<<"}"<<endl;
-                //cout<<"assign2_list={";
-                //for (j=0;j<chain2_num;j++)
-                    //cout<<assign2_list[j]<<","; cout<<"}"<<endl;
-            }
-        }
-    }
-
-    /* clean up */
-    delete[]assign1_tmp;
-    delete[]assign2_tmp;
-    delete[]ut_tmc_mat;
-    ut_tm_vec.clear();
-    DeleteArray(&xt, chain1_num);
-    return MMscore;
-}
-
-/* reassign chain-chain correspondence, specific for heterooligomer */
-double hetero_refined_greedy_search(double **TMave_mat,int *assign1_list,
-    int *assign2_list, const int chain1_num, const int chain2_num,
-    double **xcentroids, double **ycentroids, const double d0MM, const int L)
-{
-    double MMscore_old=0;
-    double MMscore=0;
-    int i,j;
-
-    double **r1;
-    double **r2;
-    double **xt;
-    int chain_num=getmin(chain1_num,chain2_num);
-    NewArray(&r1, chain_num, 3);
-    NewArray(&r2, chain_num, 3);
-    NewArray(&xt, chain_num, 3);
-    double t[3];
-    double u[3][3];
-
-    /* calculate MMscore */
-    MMscore=MMscore_old=calMMscore(TMave_mat, assign1_list, chain1_num,
-        chain2_num, xcentroids, ycentroids, d0MM, r1, r2, xt, t, u, L);
-    //cout<<"MMscore="<<MMscore<<endl;
-    //cout<<"TMave_mat="<<endl;
-    //for (i=0;i<chain1_num;i++)
-    //{
-        //for (j=0; j<chain2_num; j++)
-        //{
-            //if (j<chain2_num-1) cout<<TMave_mat[i][j]<<'\t';
-            //else                cout<<TMave_mat[i][j]<<endl;
-        //}
-    //}
-
-    /* iteratively refine chain assignment. in each iteration, attempt
-     * to swap (i,old_j=assign1_list[i]) with (i,j) */
-    double delta_score=-1;
-    int *assign1_tmp=new int [chain1_num];
-    int *assign2_tmp=new int [chain2_num];
-    for (i=0;i<chain1_num;i++) assign1_tmp[i]=assign1_list[i];
-    for (j=0;j<chain2_num;j++) assign2_tmp[j]=assign2_list[j];
-    int old_i=-1;
-    int old_j=-1;
-
-    //cout<<"assign1_list={";
-    //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
-    //cout<<"assign2_list={";
-    //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
-
-    for (int iter=0;iter<chain1_num*chain2_num;iter++)
-    {
-        delta_score=-1;
-        for (i=0;i<chain1_num;i++)
-        {
-            old_j=assign1_list[i];
-            for (j=0;j<chain2_num;j++)
-            {
-                if (j==assign1_list[i] || TMave_mat[i][j]<=0) continue;
-                old_i=assign2_list[j];
-
-                assign1_tmp[i]=j;
-                if (old_i>=0) assign1_tmp[old_i]=old_j;
-                assign2_tmp[j]=i;
-                if (old_j>=0) assign2_tmp[old_j]=old_i;
-                
-                MMscore=calMMscore(TMave_mat, assign1_tmp, chain1_num,
-                    chain2_num, xcentroids, ycentroids, d0MM,
-                    r1, r2, xt, t, u, L);
-
-                //cout<<"(i,j,old_i,old_j,MMscore)=("<<i<<","<<j<<","
-                    //<<old_i<<","<<old_j<<","<<MMscore<<")"<<endl;
-
-                if (MMscore>MMscore_old) // successful swap
-                {
-                    assign1_list[i]=j;
-                    if (old_i>=0) assign1_list[old_i]=old_j;
-                    assign2_list[j]=i;
-                    if (old_j>=0) assign2_list[old_j]=old_i;
-                    delta_score=(MMscore-MMscore_old);
-                    MMscore_old=MMscore;
-                    //cout<<"MMscore="<<MMscore<<endl;
-                    break;
-                }
-                else
-                {
-                    assign1_tmp[i]=assign1_list[i];
-                    if (old_i>=0) assign1_tmp[old_i]=assign1_list[old_i];
-                    assign2_tmp[j]=assign2_list[j];
-                    if (old_j>=0) assign2_tmp[old_j]=assign2_list[old_j];
-                }
-            }
-        }
-        //cout<<"iter="<<iter<<endl;
-        //cout<<"assign1_list={";
-        //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
-        //cout<<"assign2_list={";
-        //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
-        if (delta_score<=0) break; // cannot swap any chain pair
-    }
-    MMscore=MMscore_old;
-    //cout<<"MMscore="<<MMscore<<endl;
-
-    /* clean up */
-    delete[]assign1_tmp;
-    delete[]assign2_tmp;
-    DeleteArray(&r1, chain_num);
-    DeleteArray(&r2, chain_num);
-    DeleteArray(&xt, chain_num);
-    return MMscore;
-}
-
-void copy_chain_data(const vector<vector<double> >&a_vec_i,
-    const vector<char>&seq_vec_i,const vector<char>&sec_vec_i,
-    const int len,double **a,char *seq,char *sec)
-{
-    int r;
-    for (r=0;r<len;r++)
-    {
-        a[r][0]=a_vec_i[r][0];
-        a[r][1]=a_vec_i[r][1];
-        a[r][2]=a_vec_i[r][2];
-        seq[r]=seq_vec_i[r];
-        sec[r]=sec_vec_i[r];
-    }
-    seq[len]=0;
-    sec[len]=0;
-}
-
-void parse_chain_list(const vector<string>&chain_list,
-    vector<vector<vector<double> > >&a_vec, vector<vector<char> >&seq_vec,
-    vector<vector<char> >&sec_vec, vector<int>&mol_vec, vector<int>&len_vec,
-    vector<string>&chainID_list, const int ter_opt, const int split_opt,
-    const string mol_opt, const int infmt_opt, const string atom_opt,
-    const int mirror_opt, const int het_opt, int &len_aa, int &len_na,  
-    const int o_opt, vector<string>&resi_vec)
-{
-    size_t i;
-    int chain_i,r;
-    string name;
-    int chainnum;
-    double **xa;
-    int len;
-    char *seq,*sec;
-
-    vector<vector<string> >PDB_lines;
-    vector<double> tmp_atom_array(3,0);
-    vector<vector<double> > tmp_chain_array;
-    vector<char>tmp_seq_array;
-    vector<char>tmp_sec_array;
-    //vector<string> resi_vec;
-    int read_resi=0;
-    if (o_opt) read_resi=2;
-
-    for (i=0;i<chain_list.size();i++)
-    {
-        name=chain_list[i];
-        chainnum=get_PDB_lines(name, PDB_lines, chainID_list,
-            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt, het_opt);
-        if (!chainnum)
-        {
-            cerr<<"Warning! Cannot parse file: "<<name
-                <<". Chain number 0."<<endl;
-            continue;
-        }
-        for (chain_i=0;chain_i<chainnum;chain_i++)
-        {
-            len=PDB_lines[chain_i].size();
-            if (!len)
-            {
-                cerr<<"Warning! Cannot parse file: "<<name
-                    <<". Chain length 0."<<endl;
-                continue;
-            }
-            else if (len<3)
-            {
-                cerr<<"Sequence is too short <3!: "<<name<<endl;
-                continue;
-            }
-            NewArray(&xa, len, 3);
-            seq = new char[len + 1];
-            sec = new char[len + 1];
-            len = read_PDB(PDB_lines[chain_i], xa, seq, resi_vec, read_resi);
-            if (mirror_opt) for (r=0;r<len;r++) xa[r][2]=-xa[r][2];
-            if (mol_vec[chain_i]>0 || mol_opt=="RNA")
-                make_sec(seq, xa, len, sec,atom_opt);
-            else make_sec(xa, len, sec); // secondary structure assignment
-            
-            /* store in vector */
-            tmp_chain_array.assign(len,tmp_atom_array);
-            vector<char>tmp_seq_array(len+1,0);
-            vector<char>tmp_sec_array(len+1,0);
-            for (r=0;r<len;r++)
-            {
-                tmp_chain_array[r][0]=xa[r][0];
-                tmp_chain_array[r][1]=xa[r][1];
-                tmp_chain_array[r][2]=xa[r][2];
-                tmp_seq_array[r]=seq[r];
-                tmp_sec_array[r]=sec[r];
-            }
-            a_vec.push_back(tmp_chain_array);
-            seq_vec.push_back(tmp_seq_array);
-            sec_vec.push_back(tmp_sec_array);
-            len_vec.push_back(len);
-
-            /* clean up */
-            tmp_chain_array.clear();
-            tmp_seq_array.clear();
-            tmp_sec_array.clear();
-            PDB_lines[chain_i].clear();
-            DeleteArray(&xa, len);
-            delete [] seq;
-            delete [] sec;
-        } // chain_i
-        name.clear();
-        PDB_lines.clear();
-        mol_vec.clear();
-    } // i
-    tmp_atom_array.clear();
-
-    if (mol_opt=="RNA") mol_vec.assign(a_vec.size(),1);
-    else if (mol_opt=="protein") mol_vec.assign(a_vec.size(),-1);
-    else
-    {
-        mol_vec.assign(a_vec.size(),0);
-        for (i=0;i<a_vec.size();i++)
-        {
-            for (r=0;r<len_vec[i];r++)
-            {
-                if (seq_vec[i][r]>='a' && seq_vec[i][r]<='z') mol_vec[i]++;
-                else mol_vec[i]--;
-            }
-        }
-    }
-
-    len_aa=0;
-    len_na=0;
-    for (i=0;i<a_vec.size();i++)
-    {
-        if (mol_vec[i]>0) len_na+=len_vec[i];
-        else              len_aa+=len_vec[i];
-    }
-}
-
-int copy_chain_pair_data(
-    const vector<vector<vector<double> > >&xa_vec,
-    const vector<vector<vector<double> > >&ya_vec,
-    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
-    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
-    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
-    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
-    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
-    int chain1_num, int chain2_num,
-    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
-    int *assign1_list, int *assign2_list, vector<string>&sequence)
-{
-    int i,j,r;
-    sequence.clear();
-    sequence.push_back("");
-    sequence.push_back("");
-    int mol_type=0;
-    int xlen=0;
-    int ylen=0;
-    for (i=0;i<chain1_num;i++)
-    {
-        j=assign1_list[i];
-        if (j<0) continue;
-        for (r=0;r<xlen_vec[i];r++)
-        {
-            seqx[xlen]=seqx_vec[i][r];
-            secx[xlen]=secx_vec[i][r];
-            xa[xlen][0]= xa_vec[i][r][0];
-            xa[xlen][1]= xa_vec[i][r][1];
-            xa[xlen][2]= xa_vec[i][r][2];
-            xlen++;
-        }
-        sequence[0]+=seqxA_mat[i][j];
-        for (r=0;r<ylen_vec[j];r++)
-        {
-            seqy[ylen]=seqy_vec[j][r];
-            secy[ylen]=secy_vec[j][r];
-            ya[ylen][0]= ya_vec[j][r][0];
-            ya[ylen][1]= ya_vec[j][r][1];
-            ya[ylen][2]= ya_vec[j][r][2];
-            ylen++;
-        }
-        sequence[1]+=seqyA_mat[i][j];
-        mol_type+=mol_vec1[i]+mol_vec2[j];
-    }
-    seqx[xlen]=0;
-    secx[xlen]=0;
-    seqy[ylen]=0;
-    secy[ylen]=0;
-    return mol_type;
-}
-
-double MMalign_search(
-    const vector<vector<vector<double> > >&xa_vec,
-    const vector<vector<vector<double> > >&ya_vec,
-    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
-    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
-    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
-    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
-    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
-    int len_aa, int len_na, int chain1_num, int chain2_num,
-    double **TM1_mat, double **TM2_mat, double **TMave_mat,
-    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
-    int *assign1_list, int *assign2_list, vector<string>&sequence,
-    double d0_scale, bool fast_opt)
-{
-    double total_score=0;
-    int i,j;
-    int xlen=0;
-    int ylen=0;
-    for (i=0;i<chain1_num;i++)
-    {
-        if (assign1_list[i]<0) continue;
-        xlen+=xlen_vec[i];
-        ylen+=ylen_vec[assign1_list[i]];
-    }
-    if (xlen<=3 || ylen<=3) return total_score;
-
-    seqx = new char[xlen+1];
-    secx = new char[xlen+1];
-    NewArray(&xa, xlen, 3);
-    seqy = new char[ylen+1];
-    secy = new char[ylen+1];
-    NewArray(&ya, ylen, 3);
-
-    int mol_type=copy_chain_pair_data(xa_vec, ya_vec, seqx_vec, seqy_vec,
-        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
-        xa, ya, seqx, seqy, secx, secy, chain1_num, chain2_num,
-        seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence);
-
-    /* declare variable specific to this pair of TMalign */
-    double t0[3], u0[3][3];
-    double TM1, TM2;
-    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
-    double d0_0, TM_0;
-    double d0A, d0B, d0u, d0a;
-    double d0_out=5.0;
-    string seqM, seqxA, seqyA;// for output alignment
-    double rmsd0 = 0.0;
-    int L_ali;                // Aligned length in standard_TMscore
-    double Liden=0;
-    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
-    int n_ali=0;
-    int n_ali8=0;
-
-    double Lnorm_ass=len_aa+len_na;
-
-    /* entry function for structure alignment */
-    TMalign_main(xa, ya, seqx, seqy, secx, secy,
-        t0, u0, TM1, TM2, TM3, TM4, TM5,
-        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
-        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
-        xlen, ylen, sequence, Lnorm_ass, d0_scale,
-        3, false, true, false, fast_opt, mol_type, -1);
-
-    /* clean up */
-    delete [] seqx;
-    delete [] seqy;
-    delete [] secx;
-    delete [] secy;
-    DeleteArray(&xa,xlen);
-    DeleteArray(&ya,ylen);
-
-    /* re-compute chain level alignment */
-    for (i=0;i<chain1_num;i++)
-    {
-        xlen=xlen_vec[i];
-        if (xlen<3)
-        {
-            for (j=0;j<chain2_num;j++)
-                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
-            continue;
-        }
-        seqx = new char[xlen+1];
-        secx = new char[xlen+1];
-        NewArray(&xa, xlen, 3);
-        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
-            xlen,xa,seqx,secx);
-
-        double **xt;
-        NewArray(&xt, xlen, 3);
-        do_rotation(xa, xt, xlen, t0, u0);
-
-        for (j=0;j<chain2_num;j++)
-        {
-            if (mol_vec1[i]*mol_vec2[j]<0) //no protein-RNA alignment
-            {
-                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
-                continue;
-            }
-
-            ylen=ylen_vec[j];
-            if (ylen<3)
-            {
-                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
-                continue;
-            }
-            seqy = new char[ylen+1];
-            secy = new char[ylen+1];
-            NewArray(&ya, ylen, 3);
-            copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
-                ylen,ya,seqy,secy);
-
-            /* declare variable specific to this pair of TMalign */
-            d0_out=5.0;
-            seqM.clear();
-            seqxA.clear();
-            seqyA.clear();
-            rmsd0 = 0.0;
-            Liden=0;
-            int *invmap = new int[ylen+1];
-
-            double Lnorm_ass=len_aa;
-            if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_ass=len_na;
-
-            /* entry function for structure alignment */
-            se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
-                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
-                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
-                xlen, ylen, sequence, Lnorm_ass, d0_scale,
-                0, false, true, false,
-                mol_vec1[i]+mol_vec2[j], 1, invmap);
-
-            /* print result */
-            TM1_mat[i][j]=TM2; // normalized by chain1
-            TM2_mat[i][j]=TM1; // normalized by chain2
-            seqxA_mat[i][j]=seqxA;
-            seqyA_mat[i][j]=seqyA;
-
-            TMave_mat[i][j]=TM4*Lnorm_ass;
-
-            /* clean up */
-            seqM.clear();
-            seqxA.clear();
-            seqyA.clear();
-
-            delete[]seqy;
-            delete[]secy;
-            DeleteArray(&ya,ylen);
-        }
-        delete[]seqx;
-        delete[]secx;
-        DeleteArray(&xa,xlen);
-        DeleteArray(&xt,xlen);
-    }
-    return total_score;
-}
-
-void MMalign_final(
-    const string xname, const string yname,
-    const vector<string> chainID_list1, const vector<string> chainID_list2,
-    string fname_super, string fname_lign, string fname_matrix,
-    const vector<vector<vector<double> > >&xa_vec,
-    const vector<vector<vector<double> > >&ya_vec,
-    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
-    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
-    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
-    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
-    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
-    int len_aa, int len_na, int chain1_num, int chain2_num,
-    double **TM1_mat, double **TM2_mat, double **TMave_mat,
-    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqM_mat,
-    vector<vector<string> >&seqyA_mat, int *assign1_list, int *assign2_list,
-    vector<string>&sequence, const double d0_scale, const bool m_opt,
-    const int o_opt, const int outfmt_opt, const int ter_opt,
-    const int split_opt, const bool a_opt, const bool d_opt,
-    const bool fast_opt, const bool full_opt, const int mirror_opt,
-    const vector<string>&resi_vec1, const vector<string>&resi_vec2)
-{
-    int i,j;
-    int xlen=0;
-    int ylen=0;
-    for (i=0;i<chain1_num;i++) xlen+=xlen_vec[i];
-    for (j=0;j<chain2_num;j++) ylen+=ylen_vec[j];
-    if (xlen<=3 || ylen<=3) return;
-
-    seqx = new char[xlen+1];
-    secx = new char[xlen+1];
-    NewArray(&xa, xlen, 3);
-    seqy = new char[ylen+1];
-    secy = new char[ylen+1];
-    NewArray(&ya, ylen, 3);
-
-    int mol_type=copy_chain_pair_data(xa_vec, ya_vec, seqx_vec, seqy_vec,
-        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
-        xa, ya, seqx, seqy, secx, secy, chain1_num, chain2_num,
-        seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence);
-
-    /* declare variable specific to this pair of TMalign */
-    double t0[3], u0[3][3];
-    double TM1, TM2;
-    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
-    double d0_0, TM_0;
-    double d0A, d0B, d0u, d0a;
-    double d0_out=5.0;
-    string seqM, seqxA, seqyA;// for output alignment
-    double rmsd0 = 0.0;
-    int L_ali;                // Aligned length in standard_TMscore
-    double Liden=0;
-    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
-    int n_ali=0;
-    int n_ali8=0;
-
-    double Lnorm_ass=len_aa+len_na;
-
-    /* entry function for structure alignment */
-    TMalign_main(xa, ya, seqx, seqy, secx, secy,
-        t0, u0, TM1, TM2, TM3, TM4, TM5,
-        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
-        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
-        xlen, ylen, sequence, Lnorm_ass, d0_scale,
-        3, a_opt, false, d_opt, fast_opt, mol_type, -1);
-
-    /* prepare full complex alignment */
-    string chainID1="";
-    string chainID2="";
-    sequence.clear();
-    sequence.push_back(""); // seqxA
-    sequence.push_back(""); // seqyA
-    sequence.push_back(""); // seqM
-    int aln_start=0;
-    int aln_end=0;
-    for (i=0;i<chain1_num;i++)
-    {
-        j=assign1_list[i];
-        if (j<0) continue;
-        chainID1+=chainID_list1[i];
-        chainID2+=chainID_list2[j];
-        sequence[0]+=seqxA_mat[i][j]+'*';
-        sequence[1]+=seqyA_mat[i][j]+'*';
-
-        aln_end+=seqxA_mat[i][j].size();
-        seqM_mat[i][j]=seqM.substr(aln_start,aln_end-aln_start);
-        sequence[2]+=seqM_mat[i][j]+'*';
-        aln_start=aln_end;
-    }
-
-    /* prepare unaligned region */
-    for (i=0;i<chain1_num;i++)
-    {
-        if (assign1_list[i]>=0) continue;
-        chainID1+=chainID_list1[i];
-        chainID2+=':';
-        string s(seqx_vec[i].begin(),seqx_vec[i].end());
-        sequence[0]+=s.substr(0,xlen_vec[i])+'*';
-        sequence[1]+=string(xlen_vec[i],'-')+'*';
-        s.clear();
-        sequence[2]+=string(xlen_vec[i],' ')+'*';
-    }
-    for (j=0;j<chain2_num;j++)
-    {
-        if (assign2_list[j]>=0) continue;
-        chainID1+=':';
-        chainID2+=chainID_list2[j];
-        string s(seqy_vec[j].begin(),seqy_vec[j].end());
-        sequence[0]+=string(ylen_vec[j],'-')+'*';
-        sequence[1]+=s.substr(0,ylen_vec[j])+'*';
-        s.clear();
-        sequence[2]+=string(ylen_vec[j],' ')+'*';
-    }
-
-    /* print alignment */
-    output_results(xname, yname, chainID1.c_str(), chainID2.c_str(),
-        xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
-        sequence[2].c_str(), sequence[0].c_str(), sequence[1].c_str(),
-        Liden, n_ali8, L_ali, TM_ali, rmsd_ali,
-        TM_0, d0_0, d0A, d0B, 0, d0_scale, d0a, d0u, 
-        (m_opt?fname_matrix:"").c_str(), outfmt_opt, ter_opt, true,
-        split_opt, o_opt, fname_super,
-        false, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2);
-
-    /* clean up */
-    seqM.clear();
-    seqxA.clear();
-    seqyA.clear();
-    delete [] seqx;
-    delete [] seqy;
-    delete [] secx;
-    delete [] secy;
-    DeleteArray(&xa,xlen);
-    DeleteArray(&ya,ylen);
-    sequence[0].clear();
-    sequence[1].clear();
-    sequence[2].clear();
-
-    if (!full_opt) return;
-
-    cout<<"# End of alignment for full complex. The following blocks list alignments for individual chains."<<endl;
-
-    /* re-compute chain level alignment */
-    for (i=0;i<chain1_num;i++)
-    {
-        j=assign1_list[i];
-        if (j<0) continue;
-        xlen=xlen_vec[i];
-        seqx = new char[xlen+1];
-        secx = new char[xlen+1];
-        NewArray(&xa, xlen, 3);
-        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
-            xlen,xa,seqx,secx);
-
-        double **xt;
-        NewArray(&xt, xlen, 3);
-        do_rotation(xa, xt, xlen, t0, u0);
-
-        ylen=ylen_vec[j];
-        if (ylen<3)
-        {
-            TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
-            continue;
-        }
-        seqy = new char[ylen+1];
-        secy = new char[ylen+1];
-        NewArray(&ya, ylen, 3);
-        copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
-            ylen,ya,seqy,secy);
-
-        /* declare variable specific to this pair of TMalign */
-        d0_out=5.0;
-        rmsd0 = 0.0;
-        Liden=0;
-        int *invmap = new int[ylen+1];
-        seqM="";
-        seqxA="";
-        seqyA="";
-        double Lnorm_ass=len_aa;
-        if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_ass=len_na;
-        sequence[0]=seqxA_mat[i][j];
-        sequence[1]=seqyA_mat[i][j];
-
-        /* entry function for structure alignment */
-        se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
-            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
-            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
-            xlen, ylen, sequence, Lnorm_ass, d0_scale,
-            1, a_opt, true, d_opt, mol_vec1[i]+mol_vec2[j], 1, invmap);
-
-        //TM2=TM4*Lnorm_ass/xlen;
-        //TM1=TM4*Lnorm_ass/ylen;
-        //d0A=d0u;
-        //d0B=d0u;
-
-        /* print result */
-        output_results(xname, yname,
-            chainID_list1[i].c_str(), chainID_list2[j].c_str(),
-            xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
-            seqM_mat[i][j].c_str(), seqxA_mat[i][j].c_str(),
-            seqyA_mat[i][j].c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali,
-            TM_0, d0_0, d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, 
-            "", outfmt_opt, ter_opt, false, split_opt, 0,
-            "", false, a_opt, false, d_opt, 0, resi_vec1, resi_vec2);
-
-        /* clean up */
-        seqxA.clear();
-        seqM.clear();
-        seqyA.clear();
-        sequence[0].clear();
-        sequence[1].clear();
-        delete[]seqy;
-        delete[]secy;
-        DeleteArray(&ya,ylen);
-        delete[]seqx;
-        delete[]secx;
-        DeleteArray(&xa,xlen);
-        DeleteArray(&xt,xlen);
-    }
-    sequence.clear();
-    return;
-}
diff --git a/modules/bindings/src/tmalign/OST_INFO b/modules/bindings/src/tmalign/OST_INFO
deleted file mode 100644
index 16ce11569..000000000
--- a/modules/bindings/src/tmalign/OST_INFO
+++ /dev/null
@@ -1,7 +0,0 @@
-Source code has been cloned August 2 2022 from:
-
-https://github.com/kad-ecoli/TMalign
-
-last commit:
-f0824499d8ab4fa84b2e75d253de80ab2c894c56
-
diff --git a/modules/bindings/src/wrap_tmalign.cc b/modules/bindings/src/wrap_tmalign.cc
index cefbe1a44..8c05e0228 100644
--- a/modules/bindings/src/wrap_tmalign.cc
+++ b/modules/bindings/src/wrap_tmalign.cc
@@ -17,7 +17,7 @@
 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 //------------------------------------------------------------------------------
 
-#include "tmalign/TMalign.h" // include for the external TMalign
+#include "USalign/TMalign.h" // include for the external TMalign
 
 #include <ost/mol/atom_view.hh>
 #include <ost/message.hh>
@@ -29,7 +29,8 @@ TMAlignResult WrappedTMAlign(const geom::Vec3List& pos_one,
                              const geom::Vec3List& pos_two, 
                              const ost::seq::SequenceHandle& seq1,
                              const ost::seq::SequenceHandle& seq2,
-                             bool fast) {
+                             bool fast,
+                             bool rna) {
 
   int xlen = pos_one.size();
   int ylen = pos_two.size();  
@@ -99,12 +100,13 @@ TMAlignResult WrappedTMAlign(const geom::Vec3List& pos_one,
   double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
   int n_ali=0;
   int n_ali8=0;
+  int mol_type=static_cast<int>(rna); // Treated as RNA if mol_type > 0
 
   TMalign_main(xa, ya, seqx, seqy, secx, secy, t0, u0, TM1, TM2, TM3, TM4, TM5,
                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, 
                sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, 
-               fast, 0, TMcut);
+               fast, mol_type, TMcut);
 
   // cleanup
   DeleteArray(&xa, xlen);
@@ -131,60 +133,74 @@ TMAlignResult WrappedTMAlign(const geom::Vec3List& pos_one,
   return res;
 }
 
+void ExtractChainInfo(const ost::mol::ChainView& chain, geom::Vec3List& pos,
+                      ost::seq::SequenceHandle& s, bool& rna_mode) {
 
-TMAlignResult WrappedTMAlign(const ost::mol::ChainView& chain1,
-                             const ost::mol::ChainView& chain2,
-                             bool fast) {
-
-  geom::Vec3List pos1;
-  geom::Vec3List pos2;
-  std::vector<char> s1;
-  std::vector<char> s2;
-
-  ost::mol::ResidueViewList res_list_1 = chain1.GetResidueList();
-  ost::mol::ResidueViewList res_list_2 = chain2.GetResidueList();
+  pos.clear();
+  std::vector<char> olcs;
+  rna_mode = false;
+  ost::mol::ResidueViewList res_list = chain.GetResidueList();
 
-  for(ost::mol::ResidueViewList::iterator it = res_list_1.begin(); 
-      it != res_list_1.end(); ++it) {
-    if(!it->IsPeptideLinking()) {
-      continue;
-    }
-    ost::mol::AtomView ca = it->FindAtom("CA");
-    if(!ca.IsValid()) {
-      continue;
-    }
+  for(auto it = res_list.begin(); it != res_list.end(); ++it) {
     char olc = it->GetOneLetterCode();
     if(olc == '?') {
       continue;
     }
-    pos1.push_back(ca.GetPos());
-    s1.push_back(olc);
-  }
-
-  for(ost::mol::ResidueViewList::iterator it = res_list_2.begin(); 
-      it != res_list_2.end(); ++it) {
-    if(!it->IsPeptideLinking()) {
-      continue;
-    }
-    ost::mol::AtomView ca = it->FindAtom("CA");
-    if(!ca.IsValid()) {
-      continue;
+    if(it->IsPeptideLinking()) {
+      ost::mol::AtomView ca = it->FindAtom("CA");
+      if(!ca.IsValid()) {
+        continue;
+      }
+      if(rna_mode) {
+        std::stringstream ss;
+        ss << "Error in WrappedTMAlign: Chains cannot have peptide and RNA ";
+        ss << "residues. Problematic chain: "<<chain.GetName();
+        throw ost::Error(ss.str());
+      }
+      olcs.push_back(olc);
+      pos.push_back(ca.GetPos());
     }
-    char olc = it->GetOneLetterCode();
-    if(olc == '?') {
-      continue;
+    else if(it->IsNucleotideLinking()) {
+      ost::mol::AtomView c3 = it->FindAtom("C3'");
+      if(!c3.IsValid()) {
+        continue;
+      }
+      if(rna_mode==false && !pos.empty()) {
+        std::stringstream ss;
+        ss << "Error in WrappedTMAlign: Chains cannot have peptide and RNA ";
+        ss << "residues. Problematic chain: "<<chain.GetName();
+        throw ost::Error(ss.str());
+      }
+      rna_mode = true;
+      olcs.push_back(olc);
+      pos.push_back(c3.GetPos());
     }
-    pos2.push_back(ca.GetPos());
-    s2.push_back(olc);
   }
+  String str_s = String(olcs.begin(), olcs.end());
+  s = ost::seq::CreateSequence(chain.GetName(), str_s);
+}
 
-  String str_s1(s1.begin(), s1.end());
-  String str_s2(s2.begin(), s2.end());
 
-  ost::seq::SequenceHandle seq_s1 = ost::seq::CreateSequence("one", str_s1);
-  ost::seq::SequenceHandle seq_s2 = ost::seq::CreateSequence("two", str_s2);
+TMAlignResult WrappedTMAlign(const ost::mol::ChainView& chain1,
+                             const ost::mol::ChainView& chain2,
+                             bool fast) {
+
+  geom::Vec3List pos1;
+  ost::seq::SequenceHandle s1;
+  bool rna_mode1;
+  ExtractChainInfo(chain1, pos1, s1, rna_mode1);
+
+  geom::Vec3List pos2;
+  ost::seq::SequenceHandle s2;
+  bool rna_mode2;
+  ExtractChainInfo(chain2, pos2, s2, rna_mode2);
+
+  if(rna_mode1 != rna_mode2) {
+    throw ost::Error("Error in WrappedTMAlign: Cannot compare peptide with "
+                     "RNA chains");
+  }
 
-  return WrappedTMAlign(pos1, pos2, seq_s1, seq_s2, fast);
+  return WrappedTMAlign(pos1, pos2, s1, s2, fast, rna_mode1);
 }
 
 }} //ns
diff --git a/modules/bindings/src/wrap_tmalign.hh b/modules/bindings/src/wrap_tmalign.hh
index 4163d4644..fcd126e9e 100644
--- a/modules/bindings/src/wrap_tmalign.hh
+++ b/modules/bindings/src/wrap_tmalign.hh
@@ -56,7 +56,8 @@ TMAlignResult WrappedTMAlign(const geom::Vec3List& pos_one,
                              const geom::Vec3List& pos_two, 
                              const ost::seq::SequenceHandle& seq1,
                              const ost::seq::SequenceHandle& seq2,
-                             bool fast = false);
+                             bool fast = false,
+                             bool rna = false);
 
 TMAlignResult WrappedTMAlign(const ost::mol::ChainView& ent1,
                              const ost::mol::ChainView& ent2,
-- 
GitLab