diff --git a/modules/bindings/src/tmalign/HwRMSD.cpp b/modules/bindings/src/tmalign/HwRMSD.cpp
index d50607f5bd31299248f66711ba31efc1f134c45b..651d82456462f0e1ced7970fd2d27c77d7dd3981 100644
--- a/modules/bindings/src/tmalign/HwRMSD.cpp
+++ b/modules/bindings/src/tmalign/HwRMSD.cpp
@@ -62,7 +62,16 @@ void print_extra_help()
 "             2: glocal-both alignment\n"
 "             3: Smith-Waterman algorithm for local alignment\n"
 "\n"
-"    -iter    ALignment-superposition iterations. Default is 1\n"
+"    -iter    Alignment-superposition iterations. Default is 10\n"
+"\n"
+"    -seq     Type of sequence used to make initial alignment\n"
+"             1: amino acid/nucleotide sequence\n"
+"             2: secondary structure\n"
+"             3: (default) sequence + secondary structure\n"
+"\n"
+"    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
 "\n"
 "    -infmt1  Input format for chain1\n"
 "    -infmt2  Input format for chain2\n"
@@ -91,7 +100,7 @@ void print_help(bool h_opt=false)
 "\n"
 "    -i    Start with an alignment specified in fasta file 'align.txt'\n"
 "\n"
-"    -I    Stick to the alignment 'align.txt'\n"
+"    -I    Stick to the alignment specified in 'align.txt'\n"
 "\n"
 "    -m    Output HwRMSD rotation matrix\n"
 "\n"
@@ -135,8 +144,7 @@ int main(int argc, char *argv[])
 
     bool h_opt = false; // print full help message
     bool m_opt = false; // flag for -m, output rotation matrix
-    bool i_opt = false; // flag for -i, with user given initial alignment
-    bool I_opt = false; // flag for -I, stick to user given alignment
+    int  i_opt = 0;     // 0 for -i, 3 for -I
     bool o_opt = false; // flag for -o, output superposed structure
     bool a_opt = false; // flag for -a, normalized by average length
     bool u_opt = false; // flag for -u, normalized by user specified length
@@ -147,6 +155,7 @@ int main(int argc, char *argv[])
     int    ter_opt   =3;     // TER, END, or different chainID
     int    split_opt =0;     // do not split chain
     int    outfmt_opt=0;     // set -outfmt to full output
+    int    het_opt=0;        // do not read HETATM residues
     string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
     string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
     string suffix_opt="";    // set -suffix to empty
@@ -157,7 +166,9 @@ int main(int argc, char *argv[])
     vector<string> chain1_list; // only when -dir1 is set
     vector<string> chain2_list; // only when -dir2 is set
     int    glocal    =0;
-    int    iter_opt  =1;
+    int    iter_opt  =10;
+    double early_opt =0.01;
+    int    seq_opt   =3;
 
     for(int i = 1; i < argc; i++)
     {
@@ -187,16 +198,20 @@ int main(int argc, char *argv[])
         }
         else if ( !strcmp(argv[i],"-i") && i < (argc-1) )
         {
-            fname_lign = argv[i + 1];      i_opt = true; i++;
+            if (i_opt==3)
+                PrintErrorAndQuit("ERROR! -i and -I cannot be used together");
+            fname_lign = argv[i + 1];      i_opt = 1; i++;
+        }
+        else if (!strcmp(argv[i], "-I") && i < (argc-1) )
+        {
+            if (i_opt==1)
+                PrintErrorAndQuit("ERROR! -I and -i cannot be used together");
+            fname_lign = argv[i + 1];      i_opt = 3; i++;
         }
         else if (!strcmp(argv[i], "-m") && i < (argc-1) )
         {
             fname_matrix = argv[i + 1];    m_opt = true; i++;
         }// get filename for rotation matrix
-        else if (!strcmp(argv[i], "-I") && i < (argc-1) )
-        {
-            fname_lign = argv[i + 1];      I_opt = true; i++;
-        }
         else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) )
         {
             infmt1_opt=atoi(argv[i + 1]); i++;
@@ -253,6 +268,18 @@ int main(int argc, char *argv[])
         {
             iter_opt=atoi(argv[i + 1]); i++;
         }
+        else if ( !strcmp(argv[i],"-early") && i < (argc-1) )
+        {
+            early_opt=atof(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-seq") && i < (argc-1) )
+        {
+            seq_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
         else if (xname.size() == 0) xname=argv[i];
         else if (yname.size() == 0) yname=argv[i];
         else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
@@ -280,16 +307,14 @@ int main(int argc, char *argv[])
             PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2");
     }
     if (atom_opt.size()!=4)
-        PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
     if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
-        PrintErrorAndQuit("ERROR! molecule type must be either RNA or protein.");
+        PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein.");
     else if (mol_opt=="protein" && atom_opt=="auto")
         atom_opt=" CA ";
     else if (mol_opt=="RNA" && atom_opt=="auto")
         atom_opt=" C3'";
 
-    if (i_opt && I_opt)
-        PrintErrorAndQuit("ERROR! -I and -i cannot be used together");
     if (u_opt && Lnorm_ass<=0)
         PrintErrorAndQuit("Wrong value for option -u!  It should be >0");
     if (d_opt && d0_scale<=0)
@@ -298,7 +323,7 @@ int main(int argc, char *argv[])
         PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d");
     if (byresi_opt!=0)
     {
-        if (i_opt || I_opt)
+        if (i_opt)
             PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I");
         if (byresi_opt<0 || byresi_opt>3)
             PrintErrorAndQuit("-byresi can only be 0, 1, 2 or 3");
@@ -312,11 +337,13 @@ int main(int argc, char *argv[])
     if (split_opt<0 || split_opt>2)
         PrintErrorAndQuit("-split can only be 0, 1 or 2");
     if (iter_opt<=0) PrintErrorAndQuit("-iter must be >0");
+    if (seq_opt!=1 && seq_opt!=2 && seq_opt!=3)
+        PrintErrorAndQuit("-seq must be 1, 2 or 3");
 
     /* read initial alignment file from 'align.txt' */
-    if (i_opt || I_opt) read_user_alignment(sequence, fname_lign, I_opt);
+    if (i_opt) read_user_alignment(sequence, fname_lign, i_opt);
 
-    if (byresi_opt) I_opt=true;
+    if (byresi_opt) i_opt=3;
 
     if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt
         PrintErrorAndQuit("ERROR! Please provide a file name for option -m!");
@@ -347,7 +374,7 @@ int main(int argc, char *argv[])
     int    xlen, ylen;         // chain length
     int    xchainnum,ychainnum;// number of chains in a PDB file
     char   *seqx, *seqy;       // for the protein sequence 
-    int    *secx, *secy;       // for the secondary structure 
+    char    *secx, *secy;       // for the secondary structure 
     double **xa, **ya;         // for input vectors xa[0...xlen-1][0..2] and
                                // ya[0...ylen-1][0..2], in general,
                                // ya is regarded as native structure 
@@ -361,7 +388,7 @@ int main(int argc, char *argv[])
         /* parse chain 1 */
         xname=chain1_list[i];
         xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
-            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt);
+            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
         if (!xchainnum)
         {
             cerr<<"Warning! Cannot parse file: "<<xname
@@ -379,18 +406,13 @@ int main(int argc, char *argv[])
                     <<". Chain length 0."<<endl;
                 continue;
             }
-            else if (xlen<=5)
-            {
-                cerr<<"Sequence is too short <=5!: "<<xname<<endl;
-                continue;
-            }
             NewArray(&xa, xlen, 3);
             seqx = new char[xlen + 1];
             xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, 
                 resi_vec1, byresi_opt);
-            if (iter_opt>=2)  // secondary structure assignment
+            if (seq_opt==2 || (seq_opt==3 && iter_opt>=2))  // SS assignment
             {
-                secx = new int[xlen];
+                secx = new char[xlen+1];
                 if (mol_vec1[chain_i]>0) 
                      make_sec(seqx, xa, xlen, secx,atom_opt);
                 else make_sec(xa, xlen, secx);
@@ -403,7 +425,8 @@ int main(int argc, char *argv[])
                 {
                     yname=chain2_list[j];
                     ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
-                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt);
+                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
+                        het_opt);
                     if (!ychainnum)
                     {
                         cerr<<"Warning! Cannot parse file: "<<yname
@@ -422,18 +445,13 @@ int main(int argc, char *argv[])
                             <<". Chain length 0."<<endl;
                         continue;
                     }
-                    else if (ylen<=5)
-                    {
-                        cerr<<"Sequence is too short <=5!: "<<yname<<endl;
-                        continue;
-                    }
                     NewArray(&ya, ylen, 3);
                     seqy = new char[ylen + 1];
                     ylen = read_PDB(PDB_lines2[chain_j], ya, seqy,
                         resi_vec2, byresi_opt);
-                    if (iter_opt>=2)
+                    if (seq_opt==2 || (seq_opt==3 && iter_opt>=2))  // SS assignment
                     {
-                        secy = new int[ylen];
+                        secy = new char[ylen+1];
                         if (mol_vec2[chain_j]>0)
                              make_sec(seqy, ya, ylen, secy, atom_opt);
                         else make_sec(ya, ylen, secy);
@@ -456,6 +474,7 @@ int main(int argc, char *argv[])
                     double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
                     int n_ali=0;
                     int n_ali8=0;
+                    int *invmap = new int[ylen+1];
 
                     /* entry function for structure alignment */
                     HwRMSD_main(xa, ya, seqx, seqy, secx, secy, t0, u0,
@@ -463,25 +482,30 @@ int main(int argc, char *argv[])
                         d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
                         rmsd0, L_ali, Liden, TM_ali,
                         rmsd_ali, n_ali, n_ali8, xlen, ylen, sequence,
-                        Lnorm_ass, d0_scale, i_opt, I_opt, a_opt, u_opt, d_opt,
-                        mol_vec1[chain_i]+mol_vec2[chain_j], glocal, iter_opt);
+                        Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j],
+                        invmap, glocal, iter_opt, seq_opt, early_opt);
+
+                    if (outfmt_opt>=2) 
+                        get_seqID(invmap, seqx, seqy, ylen, Liden, n_ali8);
 
                     /* print result */
                     output_results(
-                        xname.substr(dir1_opt.size()),
-                        yname.substr(dir2_opt.size()),
+                        xname.substr(dir1_opt.size()+dir_opt.size()),
+                        yname.substr(dir2_opt.size()+dir_opt.size()),
                         chainID_list1[chain_i].c_str(),
                         chainID_list2[chain_j].c_str(),
                         xlen, ylen, t0, u0, TM1, TM2, 
                         TM3, TM4, TM5, rmsd0, d0_out,
                         seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
-                        n_ali8, n_ali, L_ali, TM_ali, rmsd_ali,
+                        n_ali8, L_ali, TM_ali, rmsd_ali,
                         TM_0, d0_0, d0A, d0B,
                         Lnorm_ass, d0_scale, d0a, d0u, 
                         (m_opt?fname_matrix+chainID_list1[chain_i]:"").c_str(),
-                        outfmt_opt, ter_opt, 
+                        outfmt_opt, ter_opt, false, split_opt, o_opt,
                         (o_opt?fname_super+chainID_list1[chain_i]:"").c_str(),
-                        false, false, a_opt, u_opt, d_opt);
+                        false, a_opt, u_opt, d_opt, 0,
+                        resi_vec1, resi_vec2);
 
                     /* Done! Free memory */
                     seqM.clear();
@@ -489,6 +513,7 @@ int main(int argc, char *argv[])
                     seqyA.clear();
                     DeleteArray(&ya, ylen);
                     delete [] seqy;
+                    delete [] invmap;
                     if (iter_opt>=2) delete [] secy;
                     resi_vec2.clear();
                 } // chain_j
diff --git a/modules/bindings/src/tmalign/HwRMSD.h b/modules/bindings/src/tmalign/HwRMSD.h
index 312477af1e93cf339f32b76fcc187ff5dc79f4de..8a29399cdfaf9c8b55e3767508f3ff6d78a4cad4 100644
--- a/modules/bindings/src/tmalign/HwRMSD.h
+++ b/modules/bindings/src/tmalign/HwRMSD.h
@@ -3,9 +3,6 @@
 #include "NWalign.h"
 #include "se.h"
 
-const char* HwRMSD_SSmapProtein=" CHTE";
-const char* HwRMSD_SSmapRNA    =" .<> ";
-
 double Kabsch_Superpose(double **r1, double **r2, double **xt,
     double **xa, double **ya, int xlen, int ylen, int invmap[],
     int& L_ali, double t[3], double u[3][3], const int mol_type)
@@ -44,17 +41,41 @@ double Kabsch_Superpose(double **r1, double **r2, double **xt,
     return RMSD;
 }
 
+void parse_alignment_into_invmap(const string seqxA_tmp,
+    const string seqyA_tmp, const int xlen, const int ylen, int *invmap_tmp)
+{
+    if (seqxA_tmp.size()==0) return;
+    int i1=-1;
+    int i2=-1;
+    int j = 0;
+    int L = min(seqxA_tmp.size(), seqyA_tmp.size());
+    for (j = 0; j < ylen; j++) invmap_tmp[j] = -1;
+    for (j = 0; j<L; j++)
+    {
+        if (seqxA_tmp[j] != '-') i1++;
+        if (seqyA_tmp[j] != '-')
+        {
+            i2++;
+            if (i2 >= ylen || i1 >= xlen) j = L;
+            else if (seqxA_tmp[j] != '-') invmap_tmp[i2] = i1;
+        }
+    }
+    return;
+}
+
+/* outfmt_opt is disabled for alignment consistency */
 int HwRMSD_main(double **xa, double **ya, const char *seqx, const char *seqy,
-    const int *secx, const int *secy, double t0[3], double u0[3][3],
+    const char *secx, const char *secy, double t0[3], double u0[3][3],
     double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
     double &d0_0, double &TM_0, double &d0A, double &d0B, double &d0u,
     double &d0a, double &d0_out, string &seqM, string &seqxA, string &seqyA,
     double &rmsd0, int &L_ali, double &Liden, double &TM_ali,
     double &rmsd_ali, int &n_ali, int &n_ali8, const int xlen, const int ylen,
     const vector<string>&sequence, const double Lnorm_ass,
-    const double d0_scale, const bool i_opt, const bool I_opt,
-    const int a_opt, const bool u_opt, const bool d_opt,
-    const int mol_type, const int glocal=0, const int iter_opt=1)
+    const double d0_scale, const int i_opt,
+    const int a_opt, const bool u_opt, const bool d_opt, const int mol_type,
+    int *invmap, const int glocal=0, const int iter_opt=10,
+    const int seq_opt=3, const double early_opt=0.01)
 {
     /***********************/
     /* allocate memory     */
@@ -66,9 +87,7 @@ int HwRMSD_main(double **xa, double **ya, const char *seqx, const char *seqy,
     NewArray(&xt, xlen, 3);
     NewArray(&r1, minlen, 3);
     NewArray(&r2, minlen, 3);
-    int *invmap = new int[ylen+1];
-    char *ssx;
-    char *ssy;
+    int *invmap_tmp = new int[ylen+1];
 
     int i, j, i1, i2, L;
     double TM1_tmp,TM2_tmp,TM3_tmp,TM4_tmp,TM5_tmp,TM_ali_tmp;
@@ -77,72 +96,80 @@ int HwRMSD_main(double **xa, double **ya, const char *seqx, const char *seqy,
     int L_ali_tmp,n_ali_tmp,n_ali8_tmp;
     double Liden_tmp;
     double rmsd_ali_tmp;
+    double max_TM=0;
+    double cur_TM=0;
 
     /* initialize alignment */
     TM1=TM2=TM1_tmp=TM2_tmp=L_ali=-1;
-    if (I_opt || i_opt)
+
+    if (i_opt)
     {
         seqxA_tmp=sequence[0];
         seqyA_tmp=sequence[1];
     }
-    else
-        NWalign(seqx, seqy, xlen, ylen, seqxA_tmp, seqyA_tmp, mol_type, glocal);
-    int total_iter=(I_opt || iter_opt<1)?1:iter_opt;
+    else if (seq_opt==2) NWalign_main(secx, secy, xlen, ylen,
+            seqxA_tmp, seqyA_tmp, mol_type, invmap_tmp, 1, glocal);
+    else NWalign_main(seqx, seqy, xlen, ylen,
+            seqxA_tmp, seqyA_tmp, mol_type, invmap_tmp, 1, glocal);
+    int total_iter=(i_opt==3 || iter_opt<1)?1:iter_opt;
 
     /*******************************/
     /* perform iterative alignment */
     /*******************************/
     for (int iter=0;iter<total_iter;iter++)
     {
+        n_ali_tmp=n_ali8_tmp=0;
         /* get ss alignment for the second iteration */
-        if (iter==1 && !i_opt)
-        {
-            ssx=new char[xlen+1];
-            ssy=new char[ylen+1];
-            for (i=0;i<xlen;i++)
-            {
-                if (mol_type>0) ssx[i]=HwRMSD_SSmapRNA[secx[i]];
-                else ssx[i]=HwRMSD_SSmapProtein[secx[i]];
-            }
-            for (i=0;i<ylen;i++)
-            {
-                if (mol_type>0) ssy[i]=HwRMSD_SSmapRNA[secy[i]];
-                else ssy[i]=HwRMSD_SSmapProtein[secy[i]];
-            }
-            ssx[xlen]=0;
-            ssy[ylen]=0;
-            NWalign(ssx, ssy, xlen, ylen, seqxA_tmp, seqyA_tmp,
-                mol_type, glocal);
-            delete [] ssx;
-            delete [] ssy;
-        }
+        if (iter==1 && !i_opt && seq_opt==3) NWalign_main(secx, secy, xlen,
+            ylen, seqxA_tmp, seqyA_tmp, mol_type, invmap_tmp, 1, glocal);
 
         /* parse initial alignment */
-        for (j = 0; j < ylen; j++) invmap[j] = -1;
-        i1 = -1;
-        i2 = -1;
-        L = min(seqxA_tmp.size(), seqyA_tmp.size());
-        for (j = 0; j<L; j++)
-        {
-            if (seqxA_tmp[j] != '-') i1++;
-            if (seqyA_tmp[j] != '-')
-            {
-                i2++;
-                if (i2 >= ylen || i1 >= xlen) j = L;
-                else if (seqxA_tmp[j] != '-') invmap[i2] = i1;
-            }
-        }
+        parse_alignment_into_invmap(seqxA_tmp, seqyA_tmp, xlen, ylen, invmap_tmp);
 
         /* superpose */
-        Kabsch_Superpose(r1, r2, xt, xa, ya, xlen, ylen, invmap,
+        Kabsch_Superpose(r1, r2, xt, xa, ya, xlen, ylen, invmap_tmp,
             L_ali, t, u, mol_type);
 
         /* derive new alignment */
-        se_main(xt, ya, seqx, seqy, TM1_tmp, TM2_tmp, TM3_tmp, TM4_tmp, TM5_tmp,
-            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+        se_main(xt, ya, seqx, seqy, TM1_tmp, TM2_tmp, TM3_tmp, TM4_tmp,
+            TM5_tmp, d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
             seqM_tmp, seqxA_tmp, seqyA_tmp, rmsd0_tmp, L_ali_tmp, Liden_tmp,
-            TM_ali_tmp, rmsd_ali_tmp, n_ali_tmp, n_ali8_tmp, xlen, ylen, sequence,
-            Lnorm_ass, d0_scale, I_opt, a_opt, u_opt, d_opt, mol_type);
+            TM_ali_tmp, rmsd_ali_tmp, n_ali_tmp, n_ali8_tmp, xlen, ylen,
+            sequence, Lnorm_ass, d0_scale, i_opt==3, a_opt, u_opt, d_opt,
+            mol_type, 1, invmap_tmp);
+
+        if (n_ali8_tmp==0)
+        {
+            cerr<<"WARNING! zero aligned residue in iteration "<<iter<<endl;
+            if (xlen>=ylen) seqxA_tmp=(string)(seqx);
+            if (xlen<=ylen) seqyA_tmp=(string)(seqy);
+            if (xlen<ylen)
+            {
+                seqxA_tmp.clear();
+                for (i1=0;i1<(int)((ylen-xlen)/2);i1++) seqxA_tmp+='-';
+                seqxA_tmp+=(string)(seqx);
+                for (i1=seqxA_tmp.size();i1<ylen;i1++) seqxA_tmp+='-';
+            }
+            if (xlen>ylen)
+            {
+                seqyA_tmp.clear();
+                for (i1=0;i1<(int)((xlen-ylen)/2);i1++) seqyA_tmp+='-';
+                seqyA_tmp+=(string)(seqy);
+                for (i1=seqyA_tmp.size();i1<xlen;i1++) seqyA_tmp+='-';
+            }
+        
+            parse_alignment_into_invmap(seqxA_tmp, seqyA_tmp, xlen, ylen, invmap_tmp);
+
+            Kabsch_Superpose(r1, r2, xt, xa, ya, xlen, ylen, invmap_tmp,
+                L_ali, t, u, mol_type);
+
+            se_main(xt, ya, seqx, seqy, TM1_tmp, TM2_tmp, TM3_tmp, TM4_tmp,
+                TM5_tmp, d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                seqM_tmp, seqxA_tmp, seqyA_tmp, rmsd0_tmp, L_ali_tmp, Liden_tmp,
+                TM_ali_tmp, rmsd_ali_tmp, n_ali_tmp, n_ali8_tmp, xlen, ylen,
+                sequence, Lnorm_ass, d0_scale, i_opt==3, a_opt, u_opt, d_opt,
+                mol_type, 1, invmap_tmp);
+        }
 
         /* accept new alignment */
         if (TM1_tmp>TM1 && TM2_tmp>TM2)
@@ -167,6 +194,7 @@ int HwRMSD_main(double **xa, double **ya, const char *seqx, const char *seqy,
             seqxA =seqxA_tmp;
             seqM  =seqM_tmp;
             seqyA =seqyA_tmp;
+            for (j=0; j<ylen; j++) invmap[j]=invmap_tmp[j];
 
             rmsd0 =rmsd0_tmp;
             Liden =Liden_tmp;
@@ -174,7 +202,7 @@ int HwRMSD_main(double **xa, double **ya, const char *seqx, const char *seqy,
             n_ali8=n_ali8_tmp;
 
             /* user specified initial alignment parameters */
-            if ((i_opt || I_opt) && L_ali==-1)
+            if (i_opt && L_ali==-1)
             {
                 L_ali=L_ali_tmp;
                 TM_ali=TM_ali_tmp;
@@ -184,8 +212,20 @@ int HwRMSD_main(double **xa, double **ya, const char *seqx, const char *seqy,
         else
         {
             if (iter>=2) break;
-            seqxA_tmp = seqxA;
-            seqyA_tmp = seqyA;
+            seqxA_tmp  = seqxA;
+            seqyA_tmp  = seqyA;
+            for (j=0; j<ylen; j++) invmap_tmp[j]=invmap[j];
+            rmsd0_tmp  = 0;
+            Liden_tmp  = 0;
+            n_ali_tmp  = 0;
+            n_ali8_tmp = 0;
+        }
+
+        if (iter>=2 && early_opt>0)
+        {
+            cur_TM=(TM1+TM2)/2;
+            if (cur_TM-max_TM<early_opt) break;
+            max_TM=cur_TM;
         }
     }
 
@@ -195,7 +235,7 @@ int HwRMSD_main(double **xa, double **ya, const char *seqx, const char *seqy,
     seqxA_tmp.clear();
     seqM_tmp.clear();
     seqyA_tmp.clear();
-    delete [] invmap;
+    delete [] invmap_tmp;
     DeleteArray(&xt, xlen);
     DeleteArray(&r1, minlen);
     DeleteArray(&r2, minlen);
diff --git a/modules/bindings/src/tmalign/Kabsch.h b/modules/bindings/src/tmalign/Kabsch.h
index a4c5d6f7e549eca28189c8bb534042ef62f80609..a12296d45b4e92cee5e7898e205f9b255b499a0c 100644
--- a/modules/bindings/src/tmalign/Kabsch.h
+++ b/modules/bindings/src/tmalign/Kabsch.h
@@ -26,7 +26,7 @@ bool Kabsch(double **x, double **y, int n, int mode, double *rms,
     int a_failed = 0, b_failed = 0;
     double epsilon = 0.00000001;
 
-    //initializtation
+    //initialization
     *rms = 0;
     rms1 = 0;
     e0 = 0;
@@ -99,7 +99,7 @@ bool Kabsch(double **x, double **y, int n, int mode, double *rms,
         r[j][2] = sz[j] - s1[2] * s2[j] / n;
     }
 
-    //compute determinat of matrix r
+    //compute determinant of matrix r
     det = r[0][0] * (r[1][1] * r[2][2] - r[1][2] * r[2][1])\
         - r[0][1] * (r[1][0] * r[2][2] - r[1][2] * r[2][0])\
         + r[0][2] * (r[1][0] * r[2][1] - r[1][1] * r[2][0]);
diff --git a/modules/bindings/src/tmalign/MMalign.cpp b/modules/bindings/src/tmalign/MMalign.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6cc485647ec95d9f6c581148fa9dd059981d6518
--- /dev/null
+++ b/modules/bindings/src/tmalign/MMalign.cpp
@@ -0,0 +1,680 @@
+/* command line argument parsing and document of MMalign main program */
+
+#include "MMalign.h"
+
+using namespace std;
+
+void print_version()
+{
+    cout << 
+"\n"
+" **********************************************************************\n"
+" * MM-align (Version 20200519): complex structure alignment           *\n"
+" * References: S Mukherjee, Y Zhang. Nucl Acids Res 37(11):e83 (2009) *\n"
+" * Please email comments and suggestions to yangzhanglab@umich.edu    *\n"
+" **********************************************************************"
+    << endl;
+}
+
+void print_extra_help()
+{
+    cout <<
+"Additional options:\n"
+"    -fast    Fast but slightly inaccurate alignment\n"
+"\n"
+"    -dir1    Use a list of PDB chains listed by 'chain1_list' under\n"
+"             'chain1_folder' as all chains for the first complex.\n"
+"             Note that the slash is necessary.\n"
+"             $ MMalign -dir1 chain1_folder/ chain1_list complex2\n"
+"\n"
+"    -dir2    Use a list of PDB chains listed by'chain2_list'\n"
+"             under 'chain2_folder' as all chains for the second complex.\n"
+"             $ MMalign complex1 -dir2 chain2_folder/ chain2_list\n"
+"\n"
+"    -suffix  (Only when -dir1 and/or -dir2 are set, default is empty)\n"
+"             add file name suffix to files listed by chain1_list or chain2_list\n"
+"\n"
+"    -atom    4-character atom name used to represent a residue.\n"
+"             Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n"
+"             (note the spaces before and after CA).\n"
+"\n"
+"    -mol     Types of molecules to align\n""Molecule type: RNA or protein\n"
+"             auto   : (default) align both proteins and nucleic acids\n"
+"             protein: only align proteins\n"
+"             RNA    : only align nucleic acids (RNA and DNA)\n"
+"\n"
+"    -split   Whether to split PDB file into multiple chains\n"
+"             2: (default) treat each chain as a seperate chain (-ter should be <=1)\n"
+"             1: treat each MODEL as a separate chain (-ter should be 0)\n"
+"                and joins all chains in a MODEL into a single chain.\n"
+"\n"
+"    -outfmt  Output format\n"
+"             0: (default) full output\n"
+"             1: fasta format compact output\n"
+"             2: tabular format very compact output\n"
+"            -1: full output, but without version or citation information\n"
+"\n"
+"    -TMcut   -1: (default) do not consider TMcut\n"
+"             Values in [0.5,1): Do not proceed with TM-align for this\n"
+"                 structure pair if TM-score is unlikely to reach TMcut.\n"
+"                 TMcut is normalized is set by -a option:\n"
+"                 -2: normalized by longer structure length\n"
+"                 -1: normalized by shorter structure length\n"
+"                  0: (default, same as F) normalized by second structure\n"
+"                  1: same as T, normalized by average structure length\n"
+"\n"
+"    -mirror  Whether to align the mirror image of input structure\n"
+"             0: (default) do not align mirrored structure\n"
+"             1: align mirror of chain1 to origin chain2\n"
+"\n"
+"    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
+"    -infmt1  Input format for complex1\n"
+"    -infmt2  Input format for complex2\n"
+"            -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
+"             0: PDB format\n"
+"             1: SPICKER format\n"
+"             2: xyz format\n"
+"             3: PDBx/mmCIF format\n"
+    <<endl;
+}
+
+void print_help(bool h_opt=false)
+{
+    print_version();
+    cout <<
+"\n"
+"Usage: MMalign complex1.pdb complex2.pdb [Options]\n"
+"\n"
+"Options:\n"
+"    -a    TM-score normalized by the average length of two structures\n"
+"          T or F, (default F)\n"
+"\n"
+"    -m    Output MM-align rotation matrix\n"
+"\n"
+"    -d    TM-score scaled by an assigned d0, e.g. 5 Angstroms\n"
+"\n"
+"    -o    Output the superposition of complex1.pdb to MM_sup.pdb\n"
+"          $ MMalign complex1.pdb complex2.pdb -o MM_sup.pdb\n"
+"          To view superposed full-atom structures:\n"
+"          $ pymol MM_sup.pdb complex2.pdb\n"
+"\n"
+"    -full Whether to show full alignment result, including alignment of\n"
+"          individual chains. T or F, (default F)\n"
+"\n"
+"    -ter  Whether to read all MODELs in a multi-model structure file\n"
+"          1: (default) only read the first model, recommended for alignment\n"
+"             of asymetric units.\n"
+"          0: read all MODEL, recomended for alignment of biological\n"
+"             assemblies, i.e., biological units (biounits).\n"
+"\n"
+"    -v    Print the version of MM-align\n"
+"\n"
+"    -h    Print the full help message\n"
+"\n"
+"    (Options -a, -d, -m, -o won't change the final structure alignment)\n\n"
+"Example usages:\n"
+"    MMalign complex1.pdb complex2.pdb\n"
+"    MMalign complex1.pdb complex2.pdb -d 5.0\n"
+"    MMalign complex1.pdb complex2.pdb -a T -o complex1.sup\n"
+"    MMalign complex1.pdb complex2.pdb -m matrix.txt\n"
+    <<endl;
+
+    if (h_opt) print_extra_help();
+
+    exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2) print_help();
+
+
+    clock_t t1, t2;
+    t1 = clock();
+
+    /**********************/
+    /*    get argument    */
+    /**********************/
+    string xname       = "";
+    string yname       = "";
+    string fname_super = ""; // file name for superposed structure
+    string fname_lign  = ""; // file name for user alignment
+    string fname_matrix= ""; // file name for output matrix
+    vector<string> sequence; // get value from alignment file
+    double d0_scale    =0;
+
+    bool h_opt = false; // print full help message
+    bool v_opt = false; // print version
+    bool m_opt = false; // flag for -m, output rotation matrix
+    bool o_opt = false; // flag for -o, output superposed structure
+    int  a_opt = 0;     // flag for -a, do not normalized by average length
+    bool d_opt = false; // flag for -d, user specified d0
+
+    bool   full_opt  = false;// do not show chain level alignment
+    double TMcut     =-1;
+    int    infmt1_opt=-1;    // PDB or PDBx/mmCIF format for chain_1
+    int    infmt2_opt=-1;    // PDB or PDBx/mmCIF format for chain_2
+    int    ter_opt   =1;     // ENDMDL or END
+    int    split_opt =2;     // split by chain
+    int    outfmt_opt=0;     // set -outfmt to full output
+    bool   fast_opt  =false; // flags for -fast, fTM-align algorithm
+    int    mirror_opt=0;     // do not align mirror
+    int    het_opt   =0;     // do not read HETATM residues
+    string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
+    string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
+    string suffix_opt="";    // set -suffix to empty
+    string dir1_opt  ="";    // set -dir1 to empty
+    string dir2_opt  ="";    // set -dir2 to empty
+    vector<string> chain1_list; // only when -dir1 is set
+    vector<string> chain2_list; // only when -dir2 is set
+
+    for(int i = 1; i < argc; i++)
+    {
+        if ( !strcmp(argv[i],"-o") && i < (argc-1) )
+        {
+            fname_super = argv[i + 1];     o_opt = true; i++;
+        }
+        else if ( !strcmp(argv[i],"-a") && i < (argc-1) )
+        {
+            if (!strcmp(argv[i + 1], "T"))      a_opt=true;
+            else if (!strcmp(argv[i + 1], "F")) a_opt=false;
+            else 
+            {
+                a_opt=atoi(argv[i + 1]);
+                if (a_opt!=-2 && a_opt!=-1 && a_opt!=1)
+                    PrintErrorAndQuit("-a must be -2, -1, 1, T or F");
+            }
+            i++;
+        }
+        else if ( !strcmp(argv[i],"-full") && i < (argc-1) )
+        {
+            if (!strcmp(argv[i + 1], "T"))      full_opt=true;
+            else if (!strcmp(argv[i + 1], "F")) full_opt=false;
+            else PrintErrorAndQuit("-full must be T or F");
+            i++;
+        }
+        else if ( !strcmp(argv[i],"-d") && i < (argc-1) )
+        {
+            d0_scale = atof(argv[i + 1]); d_opt = true; i++;
+        }
+        else if ( !strcmp(argv[i],"-v") )
+        {
+            v_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-h") )
+        {
+            h_opt = true;
+        }
+        else if (!strcmp(argv[i], "-m") && i < (argc-1) )
+        {
+            fname_matrix = argv[i + 1];    m_opt = true; i++;
+        }// get filename for rotation matrix
+        else if (!strcmp(argv[i], "-fast"))
+        {
+            fast_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) )
+        {
+            infmt1_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) )
+        {
+            infmt2_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-ter") && i < (argc-1) )
+        {
+            ter_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-split") && i < (argc-1) )
+        {
+            split_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-atom") && i < (argc-1) )
+        {
+            atom_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-mol") && i < (argc-1) )
+        {
+            mol_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) )
+        {
+            dir1_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) )
+        {
+            dir2_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) )
+        {
+            suffix_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) )
+        {
+            outfmt_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) )
+        {
+            TMcut=atof(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
+        else if (xname.size() == 0) xname=argv[i];
+        else if (yname.size() == 0) yname=argv[i];
+        else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
+    }
+
+    if(yname.size()==0)
+    {
+        if (h_opt) print_help(h_opt);
+        if (v_opt)
+        {
+            print_version();
+            exit(EXIT_FAILURE);
+        }
+        if (xname.size()==0)
+            PrintErrorAndQuit("Please provide input structures");
+        PrintErrorAndQuit("Please provide the second input structure");
+    }
+
+    if (suffix_opt.size() && dir1_opt.size()+dir2_opt.size()==0)
+        PrintErrorAndQuit("-suffix is only valid if -dir1 or -dir2 is set");
+    if ((dir1_opt.size() || dir2_opt.size()) && (m_opt || o_opt))
+        PrintErrorAndQuit("-m or -o cannot be set with -dir1 or -dir2");
+    if (atom_opt.size()!=4)
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
+    if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
+        PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein.");
+    else if (mol_opt=="protein" && atom_opt=="auto")
+        atom_opt=" CA ";
+    else if (mol_opt=="RNA" && atom_opt=="auto")
+        atom_opt=" C3'";
+
+    if (d_opt && d0_scale<=0)
+        PrintErrorAndQuit("Wrong value for option -d!  It should be >0");
+    if (outfmt_opt>=2 && (a_opt || d_opt))
+        PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -d");
+    if (ter_opt!=0 && ter_opt!=1)
+        PrintErrorAndQuit("-ter should be 1 or 0");
+    if (split_opt!=1 && split_opt!=2)
+        PrintErrorAndQuit("-split should be 1 or 2");
+    else if (split_opt==1 && ter_opt!=0)
+        PrintErrorAndQuit("-split 1 should be used with -ter 0");
+
+    if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt
+        PrintErrorAndQuit("ERROR! Please provide a file name for option -m!");
+
+    /* parse file list */
+    if (dir1_opt.size()==0) chain1_list.push_back(xname);
+    else file2chainlist(chain1_list, xname, dir1_opt, suffix_opt);
+
+    if (dir2_opt.size()==0) chain2_list.push_back(yname);
+    else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt);
+
+    if (outfmt_opt==2)
+        cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t"
+            <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"<<endl;
+
+    /* declare previously global variables */
+    vector<vector<vector<double> > > xa_vec; // structure of complex1
+    vector<vector<vector<double> > > ya_vec; // structure of complex2
+    vector<vector<char> >seqx_vec; // sequence of complex1
+    vector<vector<char> >seqy_vec; // sequence of complex2
+    vector<vector<char> >secx_vec; // secondary structure of complex1
+    vector<vector<char> >secy_vec; // secondary structure of complex2
+    vector<int> mol_vec1;          // molecule type of complex1, RNA if >0
+    vector<int> mol_vec2;          // molecule type of complex2, RNA if >0
+    vector<string> chainID_list1;  // list of chainID1
+    vector<string> chainID_list2;  // list of chainID2
+    vector<int> xlen_vec;          // length of complex1
+    vector<int> ylen_vec;          // length of complex2
+    int    i,j;                    // chain index
+    int    xlen, ylen;             // chain length
+    double **xa, **ya;             // structure of single chain
+    char   *seqx, *seqy;           // for the protein sequence 
+    char   *secx, *secy;           // for the secondary structure 
+    int    xlen_aa,ylen_aa;        // total length of protein
+    int    xlen_na,ylen_na;        // total length of RNA/DNA
+    vector<string> resi_vec1;  // residue index for chain1
+    vector<string> resi_vec2;  // residue index for chain2
+
+    /* parse complex */
+    parse_chain_list(chain1_list, xa_vec, seqx_vec, secx_vec, mol_vec1,
+        xlen_vec, chainID_list1, ter_opt, split_opt, mol_opt, infmt1_opt,
+        atom_opt, mirror_opt, het_opt, xlen_aa, xlen_na, o_opt, resi_vec1);
+    if (xa_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 1");
+    parse_chain_list(chain2_list, ya_vec, seqy_vec, secy_vec, mol_vec2,
+        ylen_vec, chainID_list2, ter_opt, split_opt, mol_opt, infmt2_opt,
+        atom_opt, 0, het_opt, ylen_aa, ylen_na, o_opt, resi_vec2);
+    if (ya_vec.size()==0) PrintErrorAndQuit("ERROR! 0 chain in complex 2");
+    int len_aa=getmin(xlen_aa,ylen_aa);
+    int len_na=getmin(xlen_na,ylen_na);
+    if (a_opt)
+    {
+        len_aa=(xlen_aa+ylen_aa)/2;
+        len_na=(xlen_na+ylen_na)/2;
+    }
+
+    /* perform monomer alignment if there is only one chain */
+    if (xa_vec.size()==1 && ya_vec.size()==1)
+    {
+        xlen = xlen_vec[0];
+        ylen = ylen_vec[0];
+        seqx = new char[xlen+1];
+        seqy = new char[ylen+1];
+        secx = new char[xlen+1];
+        secy = new char[ylen+1];
+        NewArray(&xa, xlen, 3);
+        NewArray(&ya, ylen, 3);
+        copy_chain_data(xa_vec[0],seqx_vec[0],secx_vec[0], xlen,xa,seqx,secx);
+        copy_chain_data(ya_vec[0],seqy_vec[0],secy_vec[0], ylen,ya,seqy,secy);
+        
+        /* declare variable specific to this pair of TMalign */
+        double t0[3], u0[3][3];
+        double TM1, TM2;
+        double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+        double d0_0, TM_0;
+        double d0A, d0B, d0u, d0a;
+        double d0_out=5.0;
+        string seqM, seqxA, seqyA;// for output alignment
+        double rmsd0 = 0.0;
+        int L_ali;                // Aligned length in standard_TMscore
+        double Liden=0;
+        double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+        int n_ali=0;
+        int n_ali8=0;
+
+        /* entry function for structure alignment */
+        TMalign_main(xa, ya, seqx, seqy, secx, secy,
+            t0, u0, TM1, TM2, TM3, TM4, TM5,
+            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+            seqM, seqxA, seqyA,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+            xlen, ylen, sequence, 0, d0_scale,
+            0, a_opt, false, d_opt, fast_opt,
+            mol_vec1[0]+mol_vec2[0],TMcut);
+
+        /* print result */
+        output_results(
+            xname.substr(dir1_opt.size()),
+            yname.substr(dir2_opt.size()),
+            chainID_list1[0], chainID_list2[0],
+            xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
+            seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
+            n_ali8, L_ali, TM_ali, rmsd_ali, TM_0, d0_0, d0A, d0B,
+            0, d0_scale, d0a, d0u, (m_opt?fname_matrix:"").c_str(),
+            outfmt_opt, ter_opt, true, split_opt, o_opt, fname_super,
+            0, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2);
+
+        /* clean up */
+        seqM.clear();
+        seqxA.clear();
+        seqyA.clear();
+        delete[]seqx;
+        delete[]seqy;
+        delete[]secx;
+        delete[]secy;
+        DeleteArray(&xa,xlen);
+        DeleteArray(&ya,ylen);
+        chain1_list.clear();
+        chain2_list.clear();
+        sequence.clear();
+
+        vector<vector<vector<double> > >().swap(xa_vec); // structure of complex1
+        vector<vector<vector<double> > >().swap(ya_vec); // structure of complex2
+        vector<vector<char> >().swap(seqx_vec); // sequence of complex1
+        vector<vector<char> >().swap(seqy_vec); // sequence of complex2
+        vector<vector<char> >().swap(secx_vec); // secondary structure of complex1
+        vector<vector<char> >().swap(secy_vec); // secondary structure of complex2
+        mol_vec1.clear();       // molecule type of complex1, RNA if >0
+        mol_vec2.clear();       // molecule type of complex2, RNA if >0
+        chainID_list1.clear();  // list of chainID1
+        chainID_list2.clear();  // list of chainID2
+        xlen_vec.clear();       // length of complex1
+        ylen_vec.clear();       // length of complex2
+
+        t2 = clock();
+        float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
+        printf("Total CPU time is %5.2f seconds\n", diff);
+        return 0;
+    }
+
+    /* declare TM-score tables */
+    int chain1_num=xa_vec.size();
+    int chain2_num=ya_vec.size();
+    double **TM1_mat;
+    double **TM2_mat;
+    double **TMave_mat;
+    double **ut_mat; // rotation matrices for all-against-all alignment
+    int ui,uj,ut_idx;
+    NewArray(&TM1_mat,chain1_num,chain2_num);
+    NewArray(&TM2_mat,chain1_num,chain2_num);
+    NewArray(&TMave_mat,chain1_num,chain2_num);
+    NewArray(&ut_mat,chain1_num*chain2_num,4*3);
+    vector<string> tmp_str_vec(chain2_num,"");
+    vector<vector<string> >seqxA_mat(chain1_num,tmp_str_vec);
+    vector<vector<string> > seqM_mat(chain1_num,tmp_str_vec);
+    vector<vector<string> >seqyA_mat(chain1_num,tmp_str_vec);
+    tmp_str_vec.clear();
+
+    /* get all-against-all alignment */
+    for (i=0;i<chain1_num;i++)
+    {
+        xlen=xlen_vec[i];
+        if (xlen<3)
+        {
+            for (j=0;j<chain2_num;j++)
+                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+            continue;
+        }
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
+            xlen,xa,seqx,secx);
+
+        for (j=0;j<chain2_num;j++)
+        {
+            ut_idx=i*chain2_num+j;
+            for (ui=0;ui<4;ui++)
+                for (uj=0;uj<3;uj++) ut_mat[ut_idx][ui*3+uj]=0;
+            ut_mat[ut_idx][0]=1;
+            ut_mat[ut_idx][4]=1;
+            ut_mat[ut_idx][8]=1;
+
+            if (mol_vec1[i]*mol_vec2[j]<0) //no protein-RNA alignment
+            {
+                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+                continue;
+            }
+
+            ylen=ylen_vec[j];
+            if (ylen<3)
+            {
+                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+                continue;
+            }
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            NewArray(&ya, ylen, 3);
+            copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
+                ylen,ya,seqy,secy);
+
+            /* declare variable specific to this pair of TMalign */
+            double t0[3], u0[3][3];
+            double TM1, TM2;
+            double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+            double d0_0, TM_0;
+            double d0A, d0B, d0u, d0a;
+            double d0_out=5.0;
+            string seqM, seqxA, seqyA;// for output alignment
+            double rmsd0 = 0.0;
+            int L_ali;                // Aligned length in standard_TMscore
+            double Liden=0;
+            double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+            int n_ali=0;
+            int n_ali8=0;
+
+            int Lnorm_tmp=len_aa;
+            if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_tmp=len_na;
+
+            /* entry function for structure alignment */
+            TMalign_main(xa, ya, seqx, seqy, secx, secy,
+                t0, u0, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_tmp, d0_scale,
+                0, false, true, false, true,
+                mol_vec1[i]+mol_vec2[j],TMcut);
+
+            /* store result */
+            for (ui=0;ui<3;ui++)
+                for (uj=0;uj<3;uj++) ut_mat[ut_idx][ui*3+uj]=u0[ui][uj];
+            for (uj=0;uj<3;uj++) ut_mat[ut_idx][9+uj]=t0[uj];
+            TM1_mat[i][j]=TM2; // normalized by chain1
+            TM2_mat[i][j]=TM1; // normalized by chain2
+            seqxA_mat[i][j]=seqxA;
+            seqyA_mat[i][j]=seqyA;
+            TMave_mat[i][j]=TM4*Lnorm_tmp;
+
+            /* clean up */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+        }
+
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+    }
+
+    /* calculate initial chain-chain assignment */
+    int *assign1_list; // value is index of assigned chain2
+    int *assign2_list; // value is index of assigned chain1
+    assign1_list=new int[chain1_num];
+    assign2_list=new int[chain2_num];
+    double total_score=enhanced_greedy_search(TMave_mat, assign1_list,
+        assign2_list, chain1_num, chain2_num);
+    if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain");
+
+    /* refine alignment for large oligomers */
+    int aln_chain_num=0;
+    for (i=0;i<chain1_num;i++) aln_chain_num+=(assign1_list[i]>=0);
+    bool is_oligomer=(aln_chain_num>=3);
+    if (aln_chain_num==2) // dimer alignment
+    {
+        int na_chain_num1,na_chain_num2,aa_chain_num1,aa_chain_num2;
+        count_na_aa_chain_num(na_chain_num1,aa_chain_num1,mol_vec1);
+        count_na_aa_chain_num(na_chain_num2,aa_chain_num2,mol_vec2);
+
+        /* align protein-RNA hybrid dimer to another hybrid dimer */
+        if (na_chain_num1==1 && na_chain_num2==1 && 
+            aa_chain_num1==1 && aa_chain_num2==1) is_oligomer=false;
+        /* align pure protein dimer or pure RNA dimer */
+        else if ((getmin(na_chain_num1,na_chain_num2)==0 && 
+                    aa_chain_num1==2 && aa_chain_num2==2) ||
+                 (getmin(aa_chain_num1,aa_chain_num2)==0 && 
+                    na_chain_num1==2 && na_chain_num2==2))
+        {
+            adjust_dimer_assignment(xa_vec,ya_vec,xlen_vec,ylen_vec,mol_vec1,
+                mol_vec2,assign1_list,assign2_list,seqxA_mat,seqyA_mat);
+            is_oligomer=false; // cannot refiner further
+        }
+        else is_oligomer=true; /* align oligomers to dimer */
+    }
+
+    if (aln_chain_num>=3 || is_oligomer) // oligomer alignment
+    {
+        /* extract centroid coordinates */
+        double **xcentroids;
+        double **ycentroids;
+        NewArray(&xcentroids, chain1_num, 3);
+        NewArray(&ycentroids, chain2_num, 3);
+        double d0MM=getmin(
+            calculate_centroids(xa_vec, chain1_num, xcentroids),
+            calculate_centroids(ya_vec, chain2_num, ycentroids));
+
+        /* refine enhanced greedy search with centroid superposition */
+        //double het_deg=check_heterooligomer(TMave_mat, chain1_num, chain2_num);
+        homo_refined_greedy_search(TMave_mat, assign1_list,
+            assign2_list, chain1_num, chain2_num, xcentroids,
+            ycentroids, d0MM, len_aa+len_na, ut_mat);
+        hetero_refined_greedy_search(TMave_mat, assign1_list,
+            assign2_list, chain1_num, chain2_num, xcentroids,
+            ycentroids, d0MM, len_aa+len_na);
+        
+        /* clean up */
+        DeleteArray(&xcentroids, chain1_num);
+        DeleteArray(&ycentroids, chain2_num);
+    }
+    if (len_aa+len_na>1000) fast_opt=true;
+
+    /* perform iterative alignment */
+    for (int iter=0;iter<1;iter++)
+    {
+        total_score=MMalign_search(xa_vec, ya_vec, seqx_vec, seqy_vec,
+            secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+            xa, ya, seqx, seqy, secx, secy, len_aa, len_na,
+            chain1_num, chain2_num, TM1_mat, TM2_mat, TMave_mat,
+            seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence,
+            d0_scale, true);
+        total_score=enhanced_greedy_search(TMave_mat, assign1_list,
+            assign2_list, chain1_num, chain2_num);
+        if (total_score<=0) PrintErrorAndQuit("ERROR! No assignable chain");
+    }
+
+    /* final alignment */
+    if (outfmt_opt==0) print_version();
+    MMalign_final(xname.substr(dir1_opt.size()), yname.substr(dir2_opt.size()),
+        chainID_list1, chainID_list2,
+        fname_super, fname_lign, fname_matrix,
+        xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, len_aa, len_na,
+        chain1_num, chain2_num, TM1_mat, TM2_mat, TMave_mat,
+        seqxA_mat, seqM_mat, seqyA_mat, assign1_list, assign2_list, sequence,
+        d0_scale, m_opt, o_opt, outfmt_opt, ter_opt, split_opt,
+        a_opt, d_opt, fast_opt, full_opt, mirror_opt, resi_vec1, resi_vec2);
+
+    /* clean up everything */
+    delete [] assign1_list;
+    delete [] assign2_list;
+    DeleteArray(&TM1_mat,  chain1_num);
+    DeleteArray(&TM2_mat,  chain1_num);
+    DeleteArray(&TMave_mat,chain1_num);
+    DeleteArray(&ut_mat,   chain1_num*chain2_num);
+    vector<vector<string> >().swap(seqxA_mat);
+    vector<vector<string> >().swap(seqM_mat);
+    vector<vector<string> >().swap(seqyA_mat);
+
+    vector<vector<vector<double> > >().swap(xa_vec); // structure of complex1
+    vector<vector<vector<double> > >().swap(ya_vec); // structure of complex2
+    vector<vector<char> >().swap(seqx_vec); // sequence of complex1
+    vector<vector<char> >().swap(seqy_vec); // sequence of complex2
+    vector<vector<char> >().swap(secx_vec); // secondary structure of complex1
+    vector<vector<char> >().swap(secy_vec); // secondary structure of complex2
+    mol_vec1.clear();       // molecule type of complex1, RNA if >0
+    mol_vec2.clear();       // molecule type of complex2, RNA if >0
+    vector<string>().swap(chainID_list1);  // list of chainID1
+    vector<string>().swap(chainID_list2);  // list of chainID2
+    xlen_vec.clear();       // length of complex1
+    ylen_vec.clear();       // length of complex2
+    vector<string>().swap(chain1_list);
+    vector<string>().swap(chain2_list);
+    vector<string>().swap(sequence);
+
+    t2 = clock();
+    float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
+    printf("Total CPU time is %5.2f seconds\n", diff);
+    return 0;
+}
diff --git a/modules/bindings/src/tmalign/MMalign.h b/modules/bindings/src/tmalign/MMalign.h
new file mode 100644
index 0000000000000000000000000000000000000000..af9920a8cdc8087982310c94dfba08378f46b2d7
--- /dev/null
+++ b/modules/bindings/src/tmalign/MMalign.h
@@ -0,0 +1,1194 @@
+#include "se.h"
+
+/* count the number of nucleic acid chains (na_chain_num) and
+ * protein chains (aa_chain_num) in a complex */
+int count_na_aa_chain_num(int &na_chain_num,int &aa_chain_num,
+    const vector<int>&mol_vec)
+{
+    na_chain_num=0;
+    aa_chain_num=0;
+    for (size_t i=0;i<mol_vec.size();i++)
+    {
+        if (mol_vec[i]>0) na_chain_num++;
+        else              aa_chain_num++;
+    }
+    return na_chain_num+aa_chain_num;
+}
+
+/* adjust chain assignment for dimer-dimer alignment 
+ * return true if assignment is adjusted */
+bool adjust_dimer_assignment(        
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<int>&xlen_vec, const vector<int>&ylen_vec,
+    const vector<int>&mol_vec1, const vector<int>&mol_vec2,
+    int *assign1_list, int *assign2_list,
+    const vector<vector<string> >&seqxA_mat,
+    const vector<vector<string> >&seqyA_mat)
+{
+    /* check currently assigned chains */
+    int i1,i2,j1,j2;
+    i1=i2=j1=j2=-1;    
+    int chain1_num=xa_vec.size();
+    int i,j;
+    for (i=0;i<chain1_num;i++)
+    {
+        if (assign1_list[i]>=0)
+        {
+            if (i1<0)
+            {
+                i1=i;
+                j1=assign1_list[i1];
+            }
+            else
+            {
+                i2=i;
+                j2=assign1_list[i2];
+            }
+        }
+    }
+
+    /* normalize d0 by L */
+    int xlen=xlen_vec[i1]+xlen_vec[i2];
+    int ylen=ylen_vec[j1]+ylen_vec[j2];
+    int mol_type=mol_vec1[i1]+mol_vec1[i2]+
+                 mol_vec2[j1]+mol_vec2[j2];
+    double D0_MIN, d0, d0_search;
+    double Lnorm=getmin(xlen,ylen);
+    parameter_set4final(getmin(xlen,ylen), D0_MIN, Lnorm, d0, 
+        d0_search, mol_type);
+
+    double **xa,**ya, **xt;
+    NewArray(&xa, xlen, 3);
+    NewArray(&ya, ylen, 3);
+    NewArray(&xt, xlen, 3);
+
+    double RMSD = 0;
+    double dd   = 0;
+    double t[3];
+    double u[3][3];
+    size_t L_ali=0; // index of residue in aligned region
+    size_t r=0;     // index of residue in full alignment
+
+    /* total score using current assignment */
+    L_ali=0;
+    i=j=-1;
+    for (r=0;r<seqxA_mat[i1][j1].size();r++)
+    {
+        i+=(seqxA_mat[i1][j1][r]!='-');
+        j+=(seqyA_mat[i1][j1][r]!='-');
+        if (seqxA_mat[i1][j1][r]=='-' || seqyA_mat[i1][j1][r]=='-') continue;
+        xa[L_ali][0]=xa_vec[i1][i][0];
+        xa[L_ali][1]=xa_vec[i1][i][1];
+        xa[L_ali][2]=xa_vec[i1][i][2];
+        ya[L_ali][0]=ya_vec[j1][j][0];
+        ya[L_ali][1]=ya_vec[j1][j][1];
+        ya[L_ali][2]=ya_vec[j1][j][2];
+        L_ali++;
+    }
+    i=j=-1;
+    for (r=0;r<seqxA_mat[i2][j2].size();r++)
+    {
+        i+=(seqxA_mat[i2][j2][r]!='-');
+        j+=(seqyA_mat[i2][j2][r]!='-');
+        if (seqxA_mat[i2][j2][r]=='-' || seqyA_mat[i2][j2][r]=='-') continue;
+        xa[L_ali][0]=xa_vec[i2][i][0];
+        xa[L_ali][1]=xa_vec[i2][i][1];
+        xa[L_ali][2]=xa_vec[i2][i][2];
+        ya[L_ali][0]=ya_vec[j2][j][0];
+        ya[L_ali][1]=ya_vec[j2][j][1];
+        ya[L_ali][2]=ya_vec[j2][j][2];
+        L_ali++;
+    }
+
+    Kabsch(xa, ya, L_ali, 1, &RMSD, t, u);
+    do_rotation(xa, xt, L_ali, t, u);
+
+    double total_score1=0;
+    for (r=0;r<L_ali;r++)
+    {
+        dd=dist(xt[r],ya[r]);
+        total_score1+=1/(1+dd/d0*d0);
+    }
+    total_score1/=Lnorm;
+
+    /* total score using reversed assignment */
+    L_ali=0;
+    i=j=-1;
+    for (r=0;r<seqxA_mat[i1][j2].size();r++)
+    {
+        i+=(seqxA_mat[i1][j2][r]!='-');
+        j+=(seqyA_mat[i1][j2][r]!='-');
+        if (seqxA_mat[i1][j2][r]=='-' || seqyA_mat[i1][j2][r]=='-') continue;
+        xa[L_ali][0]=xa_vec[i1][i][0];
+        xa[L_ali][1]=xa_vec[i1][i][1];
+        xa[L_ali][2]=xa_vec[i1][i][2];
+        ya[L_ali][0]=ya_vec[j2][j][0];
+        ya[L_ali][1]=ya_vec[j2][j][1];
+        ya[L_ali][2]=ya_vec[j2][j][2];
+        L_ali++;
+    }
+    i=j=-1;
+    for (r=0;r<seqxA_mat[i2][j1].size();r++)
+    {
+        i+=(seqxA_mat[i2][j1][r]!='-');
+        j+=(seqyA_mat[i2][j1][r]!='-');
+        if (seqxA_mat[i2][j1][r]=='-' || seqyA_mat[i2][j1][r]=='-') continue;
+        xa[L_ali][0]=xa_vec[i2][i][0];
+        xa[L_ali][1]=xa_vec[i2][i][1];
+        xa[L_ali][2]=xa_vec[i2][i][2];
+        ya[L_ali][0]=ya_vec[j1][j][0];
+        ya[L_ali][1]=ya_vec[j1][j][1];
+        ya[L_ali][2]=ya_vec[j1][j][2];
+        L_ali++;
+    }
+
+    Kabsch(xa, ya, L_ali, 1, &RMSD, t, u);
+    do_rotation(xa, xt, L_ali, t, u);
+
+    double total_score2=0;
+    for (r=0;r<L_ali;r++)
+    {
+        dd=dist(xt[r],ya[r]);
+        total_score2+=1/(1+dd/d0*d0);
+    }
+    total_score2/=Lnorm;
+
+    /* swap chain assignment */
+    if (total_score1<total_score2)
+    {
+        assign1_list[i1]=j2;
+        assign1_list[i2]=j1;
+        assign2_list[j1]=i2;
+        assign2_list[j2]=i1;
+    }
+
+    /* clean up */
+    DeleteArray(&xa, xlen);
+    DeleteArray(&ya, ylen);
+    DeleteArray(&xt, xlen);
+    return total_score1<total_score2;
+}
+
+/* assign chain-chain correspondence */
+double enhanced_greedy_search(double **TMave_mat,int *assign1_list,
+    int *assign2_list, const int chain1_num, const int chain2_num)
+{
+    double total_score=0;
+    double tmp_score=0;
+    int i,j;
+    int maxi=0;
+    int maxj=0;
+
+    /* initialize parameters */
+    for (i=0;i<chain1_num;i++) assign1_list[i]=-1;
+    for (j=0;j<chain2_num;j++) assign2_list[j]=-1;
+
+    /* greedy assignment: in each iteration, the highest chain pair is
+     * assigned, until no assignable chain is left */
+    while(1)
+    {
+        tmp_score=-1;
+        for (i=0;i<chain1_num;i++)
+        {
+            if (assign1_list[i]>=0) continue;
+            for (j=0;j<chain2_num;j++)
+            {
+                if (assign2_list[j]>=0 || TMave_mat[i][j]<=0) continue;
+                if (TMave_mat[i][j]>tmp_score) 
+                {
+                    maxi=i;
+                    maxj=j;
+                    tmp_score=TMave_mat[i][j];
+                }
+            }
+        }
+        if (tmp_score<=0) break; // error: no assignable chain
+        assign1_list[maxi]=maxj;
+        assign2_list[maxj]=maxi;
+        total_score+=tmp_score;
+    }
+    if (total_score<=0) return total_score; // error: no assignable chain
+    //cout<<"assign1_list={";
+    //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
+    //cout<<"assign2_list={";
+    //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
+
+    /* iterative refinemnt */
+    double delta_score;
+    int *assign1_tmp=new int [chain1_num];
+    int *assign2_tmp=new int [chain2_num];
+    for (i=0;i<chain1_num;i++) assign1_tmp[i]=assign1_list[i];
+    for (j=0;j<chain2_num;j++) assign2_tmp[j]=assign2_list[j];
+    int old_i=-1;
+    int old_j=-1;
+
+    for (int iter=0;iter<getmin(chain1_num,chain2_num)*5;iter++)
+    {
+        delta_score=-1;
+        for (i=0;i<chain1_num;i++)
+        {
+            old_j=assign1_list[i];
+            for (j=0;j<chain2_num;j++)
+            {
+                // attempt to swap (i,old_j=assign1_list[i]) with (i,j)
+                if (j==assign1_list[i] || TMave_mat[i][j]<=0) continue;
+                old_i=assign2_list[j];
+
+                assign1_tmp[i]=j;
+                if (old_i>=0) assign1_tmp[old_i]=old_j;
+                assign2_tmp[j]=i;
+                if (old_j>=0) assign2_tmp[old_j]=old_i;
+
+                delta_score=TMave_mat[i][j];
+                if (old_j>=0) delta_score-=TMave_mat[i][old_j];
+                if (old_i>=0) delta_score-=TMave_mat[old_i][j];
+                if (old_i>=0 && old_j>=0) delta_score+=TMave_mat[old_i][old_j];
+
+                if (delta_score>0) // successful swap
+                {
+                    assign1_list[i]=j;
+                    if (old_i>=0) assign1_list[old_i]=old_j;
+                    assign2_list[j]=i;
+                    if (old_j>=0) assign2_list[old_j]=old_i;
+                    total_score+=delta_score;
+                    break;
+                }
+                else
+                {
+                    assign1_tmp[i]=assign1_list[i];
+                    if (old_i>=0) assign1_tmp[old_i]=assign1_list[old_i];
+                    assign2_tmp[j]=assign2_list[j];
+                    if (old_j>=0) assign2_tmp[old_j]=assign2_list[old_j];
+                }
+            }
+            if (delta_score>0) break;
+        }
+        if (delta_score<=0) break; // cannot swap any chain pair
+    }
+
+    /* clean up */
+    delete[]assign1_tmp;
+    delete[]assign2_tmp;
+    return total_score;
+}
+
+double calculate_centroids(const vector<vector<vector<double> > >&a_vec,
+    const int chain_num, double ** centroids)
+{
+    int L=0;
+    int c,r; // index of chain and residue
+    for (c=0; c<chain_num; c++)
+    {
+        centroids[c][0]=0;
+        centroids[c][1]=0;
+        centroids[c][2]=0;
+        L=a_vec[c].size();
+        for (r=0; r<L; r++)
+        {
+            centroids[c][0]+=a_vec[c][r][0];
+            centroids[c][1]+=a_vec[c][r][1];
+            centroids[c][2]+=a_vec[c][r][2];
+        }
+        centroids[c][0]/=L;
+        centroids[c][1]/=L;
+        centroids[c][2]/=L;
+        //cout<<centroids[c][0]<<'\t'
+            //<<centroids[c][1]<<'\t'
+            //<<centroids[c][2]<<endl;
+    }
+
+    vector<double> d0_vec(chain_num,-1);
+    int c2=0;
+    double d0MM=0;
+    for (c=0; c<chain_num; c++)
+    {
+        for (c2=0; c2<chain_num; c2++)
+        {
+            if (c2==c) continue;
+            d0MM=sqrt(dist(centroids[c],centroids[c2]));
+            if (d0_vec[c]<=0) d0_vec[c]=d0MM;
+            else d0_vec[c]=getmin(d0_vec[c], d0MM);
+        }
+    }
+    d0MM=0;
+    for (c=0; c<chain_num; c++) d0MM+=d0_vec[c];
+    d0MM/=chain_num;
+    d0_vec.clear();
+    //cout<<d0MM<<endl;
+    return d0MM;
+}
+
+/* calculate MMscore of aligned chains
+ * MMscore = sum(TMave_mat[i][j]) * sum(1/(1+dij^2/d0MM^2)) 
+ *         / (L* getmin(chain1_num,chain2_num))
+ * dij is the centroid distance between chain pair i and j
+ * d0MM is scaling factor. TMave_mat[i][j] is the TM-score between
+ * chain pair i and j multiple by getmin(Li*Lj) */
+double calMMscore(double **TMave_mat,int *assign1_list,
+    const int chain1_num, const int chain2_num, double **xcentroids,
+    double **ycentroids, const double d0MM, double **r1, double **r2,
+    double **xt, double t[3], double u[3][3], const int L)
+{
+    int Nali=0; // number of aligned chain
+    int i,j;
+    double MMscore=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+
+        r1[Nali][0]=xcentroids[i][0];
+        r1[Nali][1]=xcentroids[i][1];
+        r1[Nali][2]=xcentroids[i][2];
+
+        r2[Nali][0]=ycentroids[j][0];
+        r2[Nali][1]=ycentroids[j][1];
+        r2[Nali][2]=ycentroids[j][2];
+
+        Nali++;
+        MMscore+=TMave_mat[i][j];
+    }
+    MMscore/=L;
+
+    double RMSD = 0;
+    double TMscore=0;
+    if (Nali>=3)
+    {
+        /* Kabsch superposition */
+        Kabsch(r1, r2, Nali, 1, &RMSD, t, u);
+        do_rotation(r1, xt, Nali, t, u);
+
+        /* calculate pseudo-TMscore */
+        double dd=0;
+        for (i=0;i<Nali;i++)
+        {
+            dd=dist(xt[i], r2[i]);
+            TMscore+=1/(1+dd/(d0MM*d0MM));
+        }
+    }
+    else if (Nali==2)
+    {
+        double dd=dist(r1[0],r2[0]);
+        TMscore=1/(1+dd/(d0MM*d0MM));
+    }
+    else TMscore=1; // only one aligned chain.
+    TMscore/=getmin(chain1_num,chain2_num);
+    MMscore*=TMscore;
+    return MMscore;
+}
+
+/* check if this is alignment of heterooligomer or homooligomer
+ * return het_deg, which ranges from 0 to 1.
+ * The larger the value, the more "hetero"; 
+ * Tthe smaller the value, the more "homo" */
+double check_heterooligomer(double **TMave_mat, const int chain1_num,
+    const int chain2_num)
+{
+    double het_deg=0;
+    double min_TM=-1;
+    double max_TM=-1;
+    int i,j;
+    for (i=0;i<chain1_num;i++)
+    {
+        for (j=0;j<chain2_num;j++)
+        {
+            if (min_TM<0 || TMave_mat[i][j] <min_TM) min_TM=TMave_mat[i][j];
+            if (max_TM<0 || TMave_mat[i][j]>=max_TM) max_TM=TMave_mat[i][j];
+        }
+    }
+    het_deg=(max_TM-min_TM)/max_TM;
+    //cout<<"min_TM="<<min_TM<<endl;
+    //cout<<"max_TM="<<max_TM<<endl;
+    return het_deg;
+}
+
+/* reassign chain-chain correspondence, specific for homooligomer */
+double homo_refined_greedy_search(double **TMave_mat,int *assign1_list,
+    int *assign2_list, const int chain1_num, const int chain2_num,
+    double **xcentroids, double **ycentroids, const double d0MM,
+    const int L, double **ut_mat)
+{
+    double MMscore_max=0;
+    double MMscore=0;
+    int i,j;
+    int c1,c2;
+    int max_i=-1; // the chain pair whose monomer u t yields highest MMscore
+    int max_j=-1;
+
+    int chain_num=getmin(chain1_num,chain2_num);
+    int *assign1_tmp=new int [chain1_num];
+    int *assign2_tmp=new int [chain2_num];
+    double **xt;
+    NewArray(&xt, chain1_num, 3);
+    double t[3];
+    double u[3][3];
+    int ui,uj,ut_idx;
+    double TMscore=0; // pseudo TM-score
+    double TMsum  =0;
+    double TMnow  =0;
+    double TMmax  =0;
+    double dd=0;
+
+    size_t  total_pair=chain1_num*chain2_num; // total pair
+    double *ut_tmc_mat=new double [total_pair]; // chain level TM-score
+    vector<pair<double,int> > ut_tm_vec(total_pair,make_pair(0.0,0)); // product of both
+
+    for (c1=0;c1<chain1_num;c1++)
+    {
+        for (c2=0;c2<chain2_num;c2++)
+        {
+            if (TMave_mat[c1][c2]<=0) continue;
+            ut_idx=c1*chain2_num+c2;
+            for (ui=0;ui<3;ui++)
+                for (uj=0;uj<3;uj++) u[ui][uj]=ut_mat[ut_idx][ui*3+uj];
+            for (uj=0;uj<3;uj++) t[uj]=ut_mat[ut_idx][9+uj];
+            
+            do_rotation(xcentroids, xt, chain1_num, t, u);
+
+            for (i=0;i<chain1_num;i++) assign1_tmp[i]=-1;
+            for (j=0;j<chain2_num;j++) assign2_tmp[j]=-1;
+
+
+            for (i=0;i<chain1_num;i++)
+            {
+                for (j=0;j<chain2_num;j++)
+                {
+                    ut_idx=i*chain2_num+j;
+                    ut_tmc_mat[ut_idx]=0;
+                    ut_tm_vec[ut_idx].first=-1;
+                    ut_tm_vec[ut_idx].second=ut_idx;
+                    if (TMave_mat[i][j]<=0) continue;
+                    dd=dist(xt[i],ycentroids[j]);
+                    ut_tmc_mat[ut_idx]=1/(1+dd/(d0MM*d0MM));
+                    ut_tm_vec[ut_idx].first=
+                        ut_tmc_mat[ut_idx]*TMave_mat[i][j];
+                    //cout<<"TM["<<ut_idx<<"]="<<ut_tm_vec[ut_idx].first<<endl;
+                }
+            }
+            //cout<<"sorting "<<total_pair<<" chain pairs"<<endl;
+
+            /* initial assignment */
+            assign1_tmp[c1]=c2;
+            assign2_tmp[c2]=c1;
+            TMsum=TMave_mat[c1][c2];
+            TMscore=ut_tmc_mat[c1*chain2_num+c2];
+
+            /* further assignment */
+            sort(ut_tm_vec.begin(), ut_tm_vec.end()); // sort in ascending order
+            for (ut_idx=total_pair-1;ut_idx>=0;ut_idx--)
+            {
+                j=ut_tm_vec[ut_idx].second % chain2_num;
+                i=int(ut_tm_vec[ut_idx].second / chain2_num);
+                if (TMave_mat[i][j]<=0) break;
+                if (assign1_tmp[i]>=0 || assign2_tmp[j]>=0) continue;
+                assign1_tmp[i]=j;
+                assign2_tmp[j]=i;
+                TMsum+=TMave_mat[i][j];
+                TMscore+=ut_tmc_mat[i*chain2_num+j];
+                //cout<<"ut_idx="<<ut_tm_vec[ut_idx].second
+                    //<<"\ti="<<i<<"\tj="<<j<<"\ttm="<<ut_tm_vec[ut_idx].first<<endl;
+            }
+
+            /* final MMscore */
+            MMscore=(TMsum/L)*(TMscore/chain_num);
+            if (max_i<0 || max_j<0 || MMscore>MMscore_max)
+            {
+                max_i=c1;
+                max_j=c2;
+                MMscore_max=MMscore;
+                for (i=0;i<chain1_num;i++) assign1_list[i]=assign1_tmp[i];
+                for (j=0;j<chain2_num;j++) assign2_list[j]=assign2_tmp[j];
+                //cout<<"TMsum/L="<<TMsum/L<<endl;
+                //cout<<"TMscore/chain_num="<<TMscore/chain_num<<endl;
+                //cout<<"MMscore="<<MMscore<<endl;
+                //cout<<"assign1_list={";
+                //for (i=0;i<chain1_num;i++) 
+                    //cout<<assign1_list[i]<<","; cout<<"}"<<endl;
+                //cout<<"assign2_list={";
+                //for (j=0;j<chain2_num;j++)
+                    //cout<<assign2_list[j]<<","; cout<<"}"<<endl;
+            }
+        }
+    }
+
+    /* clean up */
+    delete[]assign1_tmp;
+    delete[]assign2_tmp;
+    delete[]ut_tmc_mat;
+    ut_tm_vec.clear();
+    DeleteArray(&xt, chain1_num);
+    return MMscore;
+}
+
+/* reassign chain-chain correspondence, specific for heterooligomer */
+double hetero_refined_greedy_search(double **TMave_mat,int *assign1_list,
+    int *assign2_list, const int chain1_num, const int chain2_num,
+    double **xcentroids, double **ycentroids, const double d0MM, const int L)
+{
+    double MMscore_old=0;
+    double MMscore=0;
+    int i,j;
+
+    double **r1;
+    double **r2;
+    double **xt;
+    int chain_num=getmin(chain1_num,chain2_num);
+    NewArray(&r1, chain_num, 3);
+    NewArray(&r2, chain_num, 3);
+    NewArray(&xt, chain_num, 3);
+    double t[3];
+    double u[3][3];
+
+    /* calculate MMscore */
+    MMscore=MMscore_old=calMMscore(TMave_mat, assign1_list, chain1_num,
+        chain2_num, xcentroids, ycentroids, d0MM, r1, r2, xt, t, u, L);
+    //cout<<"MMscore="<<MMscore<<endl;
+    //cout<<"TMave_mat="<<endl;
+    //for (i=0;i<chain1_num;i++)
+    //{
+        //for (j=0; j<chain2_num; j++)
+        //{
+            //if (j<chain2_num-1) cout<<TMave_mat[i][j]<<'\t';
+            //else                cout<<TMave_mat[i][j]<<endl;
+        //}
+    //}
+
+    /* iteratively refine chain assignment. in each iteration, attempt
+     * to swap (i,old_j=assign1_list[i]) with (i,j) */
+    double delta_score=-1;
+    int *assign1_tmp=new int [chain1_num];
+    int *assign2_tmp=new int [chain2_num];
+    for (i=0;i<chain1_num;i++) assign1_tmp[i]=assign1_list[i];
+    for (j=0;j<chain2_num;j++) assign2_tmp[j]=assign2_list[j];
+    int old_i=-1;
+    int old_j=-1;
+
+    //cout<<"assign1_list={";
+    //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
+    //cout<<"assign2_list={";
+    //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
+
+    for (int iter=0;iter<chain1_num*chain2_num;iter++)
+    {
+        delta_score=-1;
+        for (i=0;i<chain1_num;i++)
+        {
+            old_j=assign1_list[i];
+            for (j=0;j<chain2_num;j++)
+            {
+                if (j==assign1_list[i] || TMave_mat[i][j]<=0) continue;
+                old_i=assign2_list[j];
+
+                assign1_tmp[i]=j;
+                if (old_i>=0) assign1_tmp[old_i]=old_j;
+                assign2_tmp[j]=i;
+                if (old_j>=0) assign2_tmp[old_j]=old_i;
+                
+                MMscore=calMMscore(TMave_mat, assign1_tmp, chain1_num,
+                    chain2_num, xcentroids, ycentroids, d0MM,
+                    r1, r2, xt, t, u, L);
+
+                //cout<<"(i,j,old_i,old_j,MMscore)=("<<i<<","<<j<<","
+                    //<<old_i<<","<<old_j<<","<<MMscore<<")"<<endl;
+
+                if (MMscore>MMscore_old) // successful swap
+                {
+                    assign1_list[i]=j;
+                    if (old_i>=0) assign1_list[old_i]=old_j;
+                    assign2_list[j]=i;
+                    if (old_j>=0) assign2_list[old_j]=old_i;
+                    delta_score=(MMscore-MMscore_old);
+                    MMscore_old=MMscore;
+                    //cout<<"MMscore="<<MMscore<<endl;
+                    break;
+                }
+                else
+                {
+                    assign1_tmp[i]=assign1_list[i];
+                    if (old_i>=0) assign1_tmp[old_i]=assign1_list[old_i];
+                    assign2_tmp[j]=assign2_list[j];
+                    if (old_j>=0) assign2_tmp[old_j]=assign2_list[old_j];
+                }
+            }
+        }
+        //cout<<"iter="<<iter<<endl;
+        //cout<<"assign1_list={";
+        //for (i=0;i<chain1_num;i++) cout<<assign1_list[i]<<","; cout<<"}"<<endl;
+        //cout<<"assign2_list={";
+        //for (j=0;j<chain2_num;j++) cout<<assign2_list[j]<<","; cout<<"}"<<endl;
+        if (delta_score<=0) break; // cannot swap any chain pair
+    }
+    MMscore=MMscore_old;
+    //cout<<"MMscore="<<MMscore<<endl;
+
+    /* clean up */
+    delete[]assign1_tmp;
+    delete[]assign2_tmp;
+    DeleteArray(&r1, chain_num);
+    DeleteArray(&r2, chain_num);
+    DeleteArray(&xt, chain_num);
+    return MMscore;
+}
+
+void copy_chain_data(const vector<vector<double> >&a_vec_i,
+    const vector<char>&seq_vec_i,const vector<char>&sec_vec_i,
+    const int len,double **a,char *seq,char *sec)
+{
+    int r;
+    for (r=0;r<len;r++)
+    {
+        a[r][0]=a_vec_i[r][0];
+        a[r][1]=a_vec_i[r][1];
+        a[r][2]=a_vec_i[r][2];
+        seq[r]=seq_vec_i[r];
+        sec[r]=sec_vec_i[r];
+    }
+    seq[len]=0;
+    sec[len]=0;
+}
+
+void parse_chain_list(const vector<string>&chain_list,
+    vector<vector<vector<double> > >&a_vec, vector<vector<char> >&seq_vec,
+    vector<vector<char> >&sec_vec, vector<int>&mol_vec, vector<int>&len_vec,
+    vector<string>&chainID_list, const int ter_opt, const int split_opt,
+    const string mol_opt, const int infmt_opt, const string atom_opt,
+    const int mirror_opt, const int het_opt, int &len_aa, int &len_na,  
+    const int o_opt, vector<string>&resi_vec)
+{
+    size_t i;
+    int chain_i,r;
+    string name;
+    int chainnum;
+    double **xa;
+    int len;
+    char *seq,*sec;
+
+    vector<vector<string> >PDB_lines;
+    vector<double> tmp_atom_array(3,0);
+    vector<vector<double> > tmp_chain_array;
+    vector<char>tmp_seq_array;
+    vector<char>tmp_sec_array;
+    //vector<string> resi_vec;
+    int read_resi=0;
+    if (o_opt) read_resi=2;
+
+    for (i=0;i<chain_list.size();i++)
+    {
+        name=chain_list[i];
+        chainnum=get_PDB_lines(name, PDB_lines, chainID_list,
+            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt, het_opt);
+        if (!chainnum)
+        {
+            cerr<<"Warning! Cannot parse file: "<<name
+                <<". Chain number 0."<<endl;
+            continue;
+        }
+        for (chain_i=0;chain_i<chainnum;chain_i++)
+        {
+            len=PDB_lines[chain_i].size();
+            if (!len)
+            {
+                cerr<<"Warning! Cannot parse file: "<<name
+                    <<". Chain length 0."<<endl;
+                continue;
+            }
+            else if (len<3)
+            {
+                cerr<<"Sequence is too short <3!: "<<name<<endl;
+                continue;
+            }
+            NewArray(&xa, len, 3);
+            seq = new char[len + 1];
+            sec = new char[len + 1];
+            len = read_PDB(PDB_lines[chain_i], xa, seq, resi_vec, read_resi);
+            if (mirror_opt) for (r=0;r<len;r++) xa[r][2]=-xa[r][2];
+            if (mol_vec[chain_i]>0 || mol_opt=="RNA")
+                make_sec(seq, xa, len, sec,atom_opt);
+            else make_sec(xa, len, sec); // secondary structure assignment
+            
+            /* store in vector */
+            tmp_chain_array.assign(len,tmp_atom_array);
+            vector<char>tmp_seq_array(len+1,0);
+            vector<char>tmp_sec_array(len+1,0);
+            for (r=0;r<len;r++)
+            {
+                tmp_chain_array[r][0]=xa[r][0];
+                tmp_chain_array[r][1]=xa[r][1];
+                tmp_chain_array[r][2]=xa[r][2];
+                tmp_seq_array[r]=seq[r];
+                tmp_sec_array[r]=sec[r];
+            }
+            a_vec.push_back(tmp_chain_array);
+            seq_vec.push_back(tmp_seq_array);
+            sec_vec.push_back(tmp_sec_array);
+            len_vec.push_back(len);
+
+            /* clean up */
+            tmp_chain_array.clear();
+            tmp_seq_array.clear();
+            tmp_sec_array.clear();
+            PDB_lines[chain_i].clear();
+            DeleteArray(&xa, len);
+            delete [] seq;
+            delete [] sec;
+        } // chain_i
+        name.clear();
+        PDB_lines.clear();
+        mol_vec.clear();
+    } // i
+    tmp_atom_array.clear();
+
+    if (mol_opt=="RNA") mol_vec.assign(a_vec.size(),1);
+    else if (mol_opt=="protein") mol_vec.assign(a_vec.size(),-1);
+    else
+    {
+        mol_vec.assign(a_vec.size(),0);
+        for (i=0;i<a_vec.size();i++)
+        {
+            for (r=0;r<len_vec[i];r++)
+            {
+                if (seq_vec[i][r]>='a' && seq_vec[i][r]<='z') mol_vec[i]++;
+                else mol_vec[i]--;
+            }
+        }
+    }
+
+    len_aa=0;
+    len_na=0;
+    for (i=0;i<a_vec.size();i++)
+    {
+        if (mol_vec[i]>0) len_na+=len_vec[i];
+        else              len_aa+=len_vec[i];
+    }
+}
+
+int copy_chain_pair_data(
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int chain1_num, int chain2_num,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
+    int *assign1_list, int *assign2_list, vector<string>&sequence)
+{
+    int i,j,r;
+    sequence.clear();
+    sequence.push_back("");
+    sequence.push_back("");
+    int mol_type=0;
+    int xlen=0;
+    int ylen=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+        for (r=0;r<xlen_vec[i];r++)
+        {
+            seqx[xlen]=seqx_vec[i][r];
+            secx[xlen]=secx_vec[i][r];
+            xa[xlen][0]= xa_vec[i][r][0];
+            xa[xlen][1]= xa_vec[i][r][1];
+            xa[xlen][2]= xa_vec[i][r][2];
+            xlen++;
+        }
+        sequence[0]+=seqxA_mat[i][j];
+        for (r=0;r<ylen_vec[j];r++)
+        {
+            seqy[ylen]=seqy_vec[j][r];
+            secy[ylen]=secy_vec[j][r];
+            ya[ylen][0]= ya_vec[j][r][0];
+            ya[ylen][1]= ya_vec[j][r][1];
+            ya[ylen][2]= ya_vec[j][r][2];
+            ylen++;
+        }
+        sequence[1]+=seqyA_mat[i][j];
+        mol_type+=mol_vec1[i]+mol_vec2[j];
+    }
+    seqx[xlen]=0;
+    secx[xlen]=0;
+    seqy[ylen]=0;
+    secy[ylen]=0;
+    return mol_type;
+}
+
+double MMalign_search(
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int len_aa, int len_na, int chain1_num, int chain2_num,
+    double **TM1_mat, double **TM2_mat, double **TMave_mat,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqyA_mat,
+    int *assign1_list, int *assign2_list, vector<string>&sequence,
+    double d0_scale, bool fast_opt)
+{
+    double total_score=0;
+    int i,j;
+    int xlen=0;
+    int ylen=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        if (assign1_list[i]<0) continue;
+        xlen+=xlen_vec[i];
+        ylen+=ylen_vec[assign1_list[i]];
+    }
+    if (xlen<=3 || ylen<=3) return total_score;
+
+    seqx = new char[xlen+1];
+    secx = new char[xlen+1];
+    NewArray(&xa, xlen, 3);
+    seqy = new char[ylen+1];
+    secy = new char[ylen+1];
+    NewArray(&ya, ylen, 3);
+
+    int mol_type=copy_chain_pair_data(xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, chain1_num, chain2_num,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence);
+
+    /* declare variable specific to this pair of TMalign */
+    double t0[3], u0[3][3];
+    double TM1, TM2;
+    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+    double d0_0, TM_0;
+    double d0A, d0B, d0u, d0a;
+    double d0_out=5.0;
+    string seqM, seqxA, seqyA;// for output alignment
+    double rmsd0 = 0.0;
+    int L_ali;                // Aligned length in standard_TMscore
+    double Liden=0;
+    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+    int n_ali=0;
+    int n_ali8=0;
+
+    double Lnorm_ass=len_aa+len_na;
+
+    /* entry function for structure alignment */
+    TMalign_main(xa, ya, seqx, seqy, secx, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        3, false, true, false, fast_opt, mol_type, -1);
+
+    /* clean up */
+    delete [] seqx;
+    delete [] seqy;
+    delete [] secx;
+    delete [] secy;
+    DeleteArray(&xa,xlen);
+    DeleteArray(&ya,ylen);
+
+    /* re-compute chain level alignment */
+    for (i=0;i<chain1_num;i++)
+    {
+        xlen=xlen_vec[i];
+        if (xlen<3)
+        {
+            for (j=0;j<chain2_num;j++)
+                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+            continue;
+        }
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
+            xlen,xa,seqx,secx);
+
+        double **xt;
+        NewArray(&xt, xlen, 3);
+        do_rotation(xa, xt, xlen, t0, u0);
+
+        for (j=0;j<chain2_num;j++)
+        {
+            if (mol_vec1[i]*mol_vec2[j]<0) //no protein-RNA alignment
+            {
+                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+                continue;
+            }
+
+            ylen=ylen_vec[j];
+            if (ylen<3)
+            {
+                TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+                continue;
+            }
+            seqy = new char[ylen+1];
+            secy = new char[ylen+1];
+            NewArray(&ya, ylen, 3);
+            copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
+                ylen,ya,seqy,secy);
+
+            /* declare variable specific to this pair of TMalign */
+            d0_out=5.0;
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+            rmsd0 = 0.0;
+            Liden=0;
+            int *invmap = new int[ylen+1];
+
+            double Lnorm_ass=len_aa;
+            if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_ass=len_na;
+
+            /* entry function for structure alignment */
+            se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                0, false, true, false,
+                mol_vec1[i]+mol_vec2[j], 1, invmap);
+
+            /* print result */
+            TM1_mat[i][j]=TM2; // normalized by chain1
+            TM2_mat[i][j]=TM1; // normalized by chain2
+            seqxA_mat[i][j]=seqxA;
+            seqyA_mat[i][j]=seqyA;
+
+            TMave_mat[i][j]=TM4*Lnorm_ass;
+
+            /* clean up */
+            seqM.clear();
+            seqxA.clear();
+            seqyA.clear();
+
+            delete[]seqy;
+            delete[]secy;
+            DeleteArray(&ya,ylen);
+        }
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+        DeleteArray(&xt,xlen);
+    }
+    return total_score;
+}
+
+void MMalign_final(
+    const string xname, const string yname,
+    const vector<string> chainID_list1, const vector<string> chainID_list2,
+    string fname_super, string fname_lign, string fname_matrix,
+    const vector<vector<vector<double> > >&xa_vec,
+    const vector<vector<vector<double> > >&ya_vec,
+    const vector<vector<char> >&seqx_vec, const vector<vector<char> >&seqy_vec,
+    const vector<vector<char> >&secx_vec, const vector<vector<char> >&secy_vec,
+    const vector<int> &mol_vec1, const vector<int> &mol_vec2,
+    const vector<int> &xlen_vec, const vector<int> &ylen_vec,
+    double **xa, double **ya, char *seqx, char *seqy, char *secx, char *secy,
+    int len_aa, int len_na, int chain1_num, int chain2_num,
+    double **TM1_mat, double **TM2_mat, double **TMave_mat,
+    vector<vector<string> >&seqxA_mat, vector<vector<string> >&seqM_mat,
+    vector<vector<string> >&seqyA_mat, int *assign1_list, int *assign2_list,
+    vector<string>&sequence, const double d0_scale, const bool m_opt,
+    const int o_opt, const int outfmt_opt, const int ter_opt,
+    const int split_opt, const bool a_opt, const bool d_opt,
+    const bool fast_opt, const bool full_opt, const int mirror_opt,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2)
+{
+    int i,j;
+    int xlen=0;
+    int ylen=0;
+    for (i=0;i<chain1_num;i++) xlen+=xlen_vec[i];
+    for (j=0;j<chain2_num;j++) ylen+=ylen_vec[j];
+    if (xlen<=3 || ylen<=3) return;
+
+    seqx = new char[xlen+1];
+    secx = new char[xlen+1];
+    NewArray(&xa, xlen, 3);
+    seqy = new char[ylen+1];
+    secy = new char[ylen+1];
+    NewArray(&ya, ylen, 3);
+
+    int mol_type=copy_chain_pair_data(xa_vec, ya_vec, seqx_vec, seqy_vec,
+        secx_vec, secy_vec, mol_vec1, mol_vec2, xlen_vec, ylen_vec,
+        xa, ya, seqx, seqy, secx, secy, chain1_num, chain2_num,
+        seqxA_mat, seqyA_mat, assign1_list, assign2_list, sequence);
+
+    /* declare variable specific to this pair of TMalign */
+    double t0[3], u0[3][3];
+    double TM1, TM2;
+    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+    double d0_0, TM_0;
+    double d0A, d0B, d0u, d0a;
+    double d0_out=5.0;
+    string seqM, seqxA, seqyA;// for output alignment
+    double rmsd0 = 0.0;
+    int L_ali;                // Aligned length in standard_TMscore
+    double Liden=0;
+    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+    int n_ali=0;
+    int n_ali8=0;
+
+    double Lnorm_ass=len_aa+len_na;
+
+    /* entry function for structure alignment */
+    TMalign_main(xa, ya, seqx, seqy, secx, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        3, a_opt, false, d_opt, fast_opt, mol_type, -1);
+
+    /* prepare full complex alignment */
+    string chainID1="";
+    string chainID2="";
+    sequence.clear();
+    sequence.push_back(""); // seqxA
+    sequence.push_back(""); // seqyA
+    sequence.push_back(""); // seqM
+    int aln_start=0;
+    int aln_end=0;
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+        chainID1+=chainID_list1[i];
+        chainID2+=chainID_list2[j];
+        sequence[0]+=seqxA_mat[i][j]+'*';
+        sequence[1]+=seqyA_mat[i][j]+'*';
+
+        aln_end+=seqxA_mat[i][j].size();
+        seqM_mat[i][j]=seqM.substr(aln_start,aln_end-aln_start);
+        sequence[2]+=seqM_mat[i][j]+'*';
+        aln_start=aln_end;
+    }
+
+    /* prepare unaligned region */
+    for (i=0;i<chain1_num;i++)
+    {
+        if (assign1_list[i]>=0) continue;
+        chainID1+=chainID_list1[i];
+        chainID2+=':';
+        string s(seqx_vec[i].begin(),seqx_vec[i].end());
+        sequence[0]+=s.substr(0,xlen_vec[i])+'*';
+        sequence[1]+=string(xlen_vec[i],'-')+'*';
+        s.clear();
+        sequence[2]+=string(xlen_vec[i],' ')+'*';
+    }
+    for (j=0;j<chain2_num;j++)
+    {
+        if (assign2_list[j]>=0) continue;
+        chainID1+=':';
+        chainID2+=chainID_list2[j];
+        string s(seqy_vec[j].begin(),seqy_vec[j].end());
+        sequence[0]+=string(ylen_vec[j],'-')+'*';
+        sequence[1]+=s.substr(0,ylen_vec[j])+'*';
+        s.clear();
+        sequence[2]+=string(ylen_vec[j],' ')+'*';
+    }
+
+    /* print alignment */
+    output_results(xname, yname, chainID1.c_str(), chainID2.c_str(),
+        xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
+        sequence[2].c_str(), sequence[0].c_str(), sequence[1].c_str(),
+        Liden, n_ali8, L_ali, TM_ali, rmsd_ali,
+        TM_0, d0_0, d0A, d0B, 0, d0_scale, d0a, d0u, 
+        (m_opt?fname_matrix:"").c_str(), outfmt_opt, ter_opt, true,
+        split_opt, o_opt, fname_super,
+        false, a_opt, false, d_opt, mirror_opt, resi_vec1, resi_vec2);
+
+    /* clean up */
+    seqM.clear();
+    seqxA.clear();
+    seqyA.clear();
+    delete [] seqx;
+    delete [] seqy;
+    delete [] secx;
+    delete [] secy;
+    DeleteArray(&xa,xlen);
+    DeleteArray(&ya,ylen);
+    sequence[0].clear();
+    sequence[1].clear();
+    sequence[2].clear();
+
+    if (!full_opt) return;
+
+    cout<<"# End of alignment for full complex. The following blocks list alignments for individual chains."<<endl;
+
+    /* re-compute chain level alignment */
+    for (i=0;i<chain1_num;i++)
+    {
+        j=assign1_list[i];
+        if (j<0) continue;
+        xlen=xlen_vec[i];
+        seqx = new char[xlen+1];
+        secx = new char[xlen+1];
+        NewArray(&xa, xlen, 3);
+        copy_chain_data(xa_vec[i],seqx_vec[i],secx_vec[i],
+            xlen,xa,seqx,secx);
+
+        double **xt;
+        NewArray(&xt, xlen, 3);
+        do_rotation(xa, xt, xlen, t0, u0);
+
+        ylen=ylen_vec[j];
+        if (ylen<3)
+        {
+            TM1_mat[i][j]=TM2_mat[i][j]=TMave_mat[i][j]=-1;
+            continue;
+        }
+        seqy = new char[ylen+1];
+        secy = new char[ylen+1];
+        NewArray(&ya, ylen, 3);
+        copy_chain_data(ya_vec[j],seqy_vec[j],secy_vec[j],
+            ylen,ya,seqy,secy);
+
+        /* declare variable specific to this pair of TMalign */
+        d0_out=5.0;
+        rmsd0 = 0.0;
+        Liden=0;
+        int *invmap = new int[ylen+1];
+        seqM="";
+        seqxA="";
+        seqyA="";
+        double Lnorm_ass=len_aa;
+        if (mol_vec1[i]+mol_vec2[j]>0) Lnorm_ass=len_na;
+        sequence[0]=seqxA_mat[i][j];
+        sequence[1]=seqyA_mat[i][j];
+
+        /* entry function for structure alignment */
+        se_main(xt, ya, seqx, seqy, TM1, TM2, TM3, TM4, TM5,
+            d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+            rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+            xlen, ylen, sequence, Lnorm_ass, d0_scale,
+            1, a_opt, true, d_opt, mol_vec1[i]+mol_vec2[j], 1, invmap);
+
+        //TM2=TM4*Lnorm_ass/xlen;
+        //TM1=TM4*Lnorm_ass/ylen;
+        //d0A=d0u;
+        //d0B=d0u;
+
+        /* print result */
+        output_results(xname, yname,
+            chainID_list1[i].c_str(), chainID_list2[j].c_str(),
+            xlen, ylen, t0, u0, TM1, TM2, TM3, TM4, TM5, rmsd0, d0_out,
+            seqM_mat[i][j].c_str(), seqxA_mat[i][j].c_str(),
+            seqyA_mat[i][j].c_str(), Liden, n_ali8, L_ali, TM_ali, rmsd_ali,
+            TM_0, d0_0, d0A, d0B, Lnorm_ass, d0_scale, d0a, d0u, 
+            "", outfmt_opt, ter_opt, false, split_opt, 0,
+            "", false, a_opt, false, d_opt, 0, resi_vec1, resi_vec2);
+
+        /* clean up */
+        seqxA.clear();
+        seqM.clear();
+        seqyA.clear();
+        sequence[0].clear();
+        sequence[1].clear();
+        delete[]seqy;
+        delete[]secy;
+        DeleteArray(&ya,ylen);
+        delete[]seqx;
+        delete[]secx;
+        DeleteArray(&xa,xlen);
+        DeleteArray(&xt,xlen);
+    }
+    sequence.clear();
+    return;
+}
diff --git a/modules/bindings/src/tmalign/NW.h b/modules/bindings/src/tmalign/NW.h
index a9dd6a51927cee4af67ce2341f6f4b93dde40359..4c9984853687c47fdf093f246d27ffe1c711416e 100644
--- a/modules/bindings/src/tmalign/NW.h
+++ b/modules/bindings/src/tmalign/NW.h
@@ -1,10 +1,10 @@
-/* Partial implementation of Needleman-Wunsch (NW) dymanamic programming for
+/* Partial implementation of Needleman-Wunsch (NW) dynamic programming for
  * global alignment. The three NWDP_TM functions below are not complete
  * implementation of NW algorithm because gap jumping in the standard Gotoh
  * algorithm is not considered. Since the gap opening and gap extension is
  * the same, this is not a problem. This code was exploited in TM-align
  * because it is about 1.5 times faster than a complete NW implementation.
- * Nevertheless, if gap openning != gap extension shall be implemented in
+ * Nevertheless, if gap opening != gap extension shall be implemented in
  * the future, the Gotoh algorithm must be implemented. In rare scenarios,
  * it is also possible to have asymmetric alignment (i.e. 
  * TMalign A.pdb B.pdb and TMalign B.pdb A.pdb have different TM_A and TM_B
@@ -24,15 +24,15 @@ void NWDP_TM(double **score, bool **path, double **val,
     //initialization
     for(i=0; i<=len1; i++)
     {
-        //val[i][0]=0;
-        val[i][0]=i*gap_open;
+        val[i][0]=0;
+        //val[i][0]=i*gap_open;
         path[i][0]=false; //not from diagonal
     }
 
     for(j=0; j<=len2; j++)
     {
-        //val[0][j]=0;
-        val[0][j]=j*gap_open;
+        val[0][j]=0;
+        //val[0][j]=j*gap_open;
         path[0][j]=false; //not from diagonal
         j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
     }      
@@ -179,11 +179,91 @@ void NWDP_TM(bool **path, double **val, double **x, double **y,
     }
 }
 
+/* This is the same as the previous NWDP_TM, except for the lack of rotation
+ * Input: vectors x, y, scale factor d02, and gap_open
+ * Output: j2i[1:len2] \in {1:len1} U {-1}
+ * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
+void NWDP_SE(bool **path, double **val, double **x, double **y,
+    int len1, int len2, double d02, double gap_open, int j2i[])
+{
+    int i, j;
+    double h, v, d;
+
+    for(i=0; i<=len1; i++)
+    {
+        val[i][0]=0;
+        path[i][0]=false; //not from diagonal
+    }
+
+    for(j=0; j<=len2; j++)
+    {
+        val[0][j]=0;
+        path[0][j]=false; //not from diagonal
+        j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
+    }      
+    double dij;
+
+    //decide matrix and path
+    for(i=1; i<=len1; i++)
+    {
+        for(j=1; j<=len2; j++)
+        {
+            dij=dist(&x[i-1][0], &y[j-1][0]);    
+            d=val[i-1][j-1] +  1.0/(1+dij/d02);
+
+            //symbol insertion in horizontal (= a gap in vertical)
+            h=val[i-1][j];
+            if(path[i-1][j]) h += gap_open; //aligned in last position
+
+            //symbol insertion in vertical
+            v=val[i][j-1];
+            if(path[i][j-1]) v += gap_open; //aligned in last position
+
+
+            if(d>=h && d>=v)
+            {
+                path[i][j]=true; //from diagonal
+                val[i][j]=d;
+            }
+            else 
+            {
+                path[i][j]=false; //from horizontal
+                if(v>=h) val[i][j]=v;
+                else val[i][j]=h;
+            }
+        } //for i
+    } //for j
+
+    //trace back to extract the alignment
+    i=len1;
+    j=len2;
+    while(i>0 && j>0)
+    {
+        if(path[i][j]) //from diagonal
+        {
+            j2i[j-1]=i-1;
+            i--;
+            j--;
+        }
+        else 
+        {
+            h=val[i-1][j];
+            if(path[i-1][j]) h +=gap_open;
+
+            v=val[i][j-1];
+            if(path[i][j-1]) v +=gap_open;
+
+            if(v>=h) j--;
+            else i--;
+        }
+    }
+}
+
 /* +ss
  * Input: secondary structure secx, secy, and gap_open
  * Output: j2i[1:len2] \in {1:len1} U {-1}
  * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
-void NWDP_TM(bool **path, double **val, const int *secx, const int *secy,
+void NWDP_TM(bool **path, double **val, const char *secx, const char *secy,
     const int len1, const int len2, const double gap_open, int j2i[])
 {
 
@@ -193,15 +273,15 @@ void NWDP_TM(bool **path, double **val, const int *secx, const int *secy,
     //initialization
     for(i=0; i<=len1; i++)
     {
-        //val[i][0]=0;
-        val[i][0]=i*gap_open;
+        val[i][0]=0;
+        //val[i][0]=i*gap_open;
         path[i][0]=false; //not from diagonal
     }
 
     for(j=0; j<=len2; j++)
     {
-        //val[0][j]=0;
-        val[0][j]=j*gap_open;
+        val[0][j]=0;
+        //val[0][j]=j*gap_open;
         path[0][j]=false; //not from diagonal
         j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
     }      
diff --git a/modules/bindings/src/tmalign/NWalign.cpp b/modules/bindings/src/tmalign/NWalign.cpp
index 269e26315a5b84321488cdf4baaabb620841cf04..6b7b86c2db202c7338b3d7b576d49337e0e4ec4b 100644
--- a/modules/bindings/src/tmalign/NWalign.cpp
+++ b/modules/bindings/src/tmalign/NWalign.cpp
@@ -47,6 +47,10 @@ void print_extra_help()
 "             one read all sequence; -split >=1 means each sequence is an\n"
 "             individual entry."
 "\n"
+"    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
 "    -outfmt  Output format\n"
 "             0: (default) full output\n"
 "             1: fasta format compact output\n"
@@ -103,6 +107,7 @@ int main(int argc, char *argv[])
     int    ter_opt   =3;     // TER, END, or different chainID
     int    split_opt =0;     // do not split chain
     int    outfmt_opt=0;     // set -outfmt to full output
+    int    het_opt=0;        // do not read HETATM residues
     string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
     string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
     string suffix_opt="";    // set -suffix to empty
@@ -167,6 +172,10 @@ int main(int argc, char *argv[])
         {
             glocal=atoi(argv[i + 1]); i++;
         }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
         else if (xname.size() == 0) xname=argv[i];
         else if (yname.size() == 0) yname=argv[i];
         else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
@@ -189,9 +198,9 @@ int main(int argc, char *argv[])
     if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size()))
         PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2");
     if (atom_opt.size()!=4)
-        PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
     if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
-        PrintErrorAndQuit("ERROR! molecule type must be either RNA or protein.");
+        PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein.");
     else if (mol_opt=="protein" && atom_opt=="auto")
         atom_opt=" CA ";
     else if (mol_opt=="RNA" && atom_opt=="auto")
@@ -224,12 +233,12 @@ int main(int argc, char *argv[])
     vector<int> mol_vec2;              // molecule type of chain2, RNA if >0
     vector<string> chainID_list1;      // list of chainID1
     vector<string> chainID_list2;      // list of chainID2
-    int    i,j;                // file index
-    int    chain_i,chain_j;    // chain index
-    int    xlen, ylen;         // chain length
-    int    xchainnum,ychainnum;// number of chains in a PDB file
-    char   *seqx, *seqy;       // for the protein sequence 
-    int    l; // residue index
+    int  i,j;                // file index
+    int  chain_i,chain_j;    // chain index
+    int  xlen, ylen;         // chain length
+    int  xchainnum,ychainnum;// number of chains in a PDB file
+    char *seqx, *seqy;       // for the protein sequence 
+    int  l;                  // residue index
 
     /* loop over file names */
     for (i=0;i<chain1_list.size();i++)
@@ -239,7 +248,7 @@ int main(int argc, char *argv[])
         if (infmt1_opt>=4) xchainnum=get_FASTA_lines(xname, PDB_lines1, 
                 chainID_list1, mol_vec1, ter_opt, split_opt);
         else xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
-                mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt);
+                mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
         if (!xchainnum)
         {
             cerr<<"Warning! Cannot parse file: "<<xname
@@ -274,8 +283,8 @@ int main(int argc, char *argv[])
                          ychainnum=get_FASTA_lines(yname, PDB_lines2,
                             chainID_list2, mol_vec2, ter_opt, split_opt);
                     else ychainnum=get_PDB_lines(yname, PDB_lines2,
-                            chainID_list2, mol_vec2, ter_opt,
-                            infmt2_opt, atom_opt, split_opt);
+                            chainID_list2, mol_vec2, ter_opt, infmt2_opt,
+                            atom_opt, split_opt, het_opt);
                     if (!ychainnum)
                     {
                         cerr<<"Warning! Cannot parse file: "<<yname
@@ -305,15 +314,19 @@ int main(int argc, char *argv[])
                     int L_ali;                // Aligned length
                     double Liden=0;
                     string seqM, seqxA, seqyA;// for output alignment
+                    int *invmap = new int[ylen+1];
                     
-                    int aln_score=NWalign(seqx, seqy, xlen, ylen, seqxA, seqyA, 
-                        mol_vec1[chain_i]+mol_vec2[chain_j], glocal);
+                    int aln_score=NWalign_main(seqx, seqy, xlen, ylen,
+                        seqxA, seqyA, mol_vec1[chain_i]+mol_vec2[chain_j],
+                        invmap, (outfmt_opt>=2)?1:0, glocal);
                     
-                    get_seqID(seqxA, seqyA, seqM, Liden, L_ali);
+                    if (outfmt_opt>=2) get_seqID(invmap, seqx, seqy, 
+                        ylen, Liden, L_ali);
+                    else get_seqID(seqxA, seqyA, seqM, Liden, L_ali);
 
                     output_NWalign_results(
-                        xname.substr(dir1_opt.size()),
-                        yname.substr(dir2_opt.size()),
+                        xname.substr(dir1_opt.size()+dir_opt.size()),
+                        yname.substr(dir2_opt.size()+dir_opt.size()),
                         chainID_list1[chain_i].c_str(),
                         chainID_list2[chain_j].c_str(),
                         xlen, ylen, seqM.c_str(), seqxA.c_str(),
@@ -324,6 +337,7 @@ int main(int argc, char *argv[])
                     seqxA.clear();
                     seqyA.clear();
                     delete [] seqy;
+                    delete [] invmap;
                 } // chain_j
                 if (chain2_list.size()>1)
                 {
diff --git a/modules/bindings/src/tmalign/NWalign.h b/modules/bindings/src/tmalign/NWalign.h
index e0125bb61113925a2eaa92a138d67991b9afb126..2c7e36a1112f8c3a4a7cbb9d813e40e39698ae6e 100644
--- a/modules/bindings/src/tmalign/NWalign.h
+++ b/modules/bindings/src/tmalign/NWalign.h
@@ -12,8 +12,8 @@ using namespace std;
 const int gapopen_blosum62=-11;
 const int gapext_blosum62=-1;
 
-const int gapopen_blastn=-5;
-const int gapext_blastn=-2;
+const int gapopen_blastn=-15; //-5;
+const int gapext_blastn =-4;  //-2;
 
 /* initialize matrix in gotoh algorithm */
 void init_gotoh_mat(int **S, int **JumpH, int **JumpV, int **P,
@@ -191,14 +191,18 @@ int calculate_score_gotoh(const int xlen,const int ylen, int **S,
 
 /* trace back dynamic programming path to diciper pairwise alignment */
 void trace_back_gotoh(const char *seqx, const char *seqy,
-    int ** JumpH, int ** JumpV, int ** P,
-    string& seqxA, string& seqyA, const int xlen, const int ylen)
+    int ** JumpH, int ** JumpV, int ** P, string& seqxA, string& seqyA,
+    const int xlen, const int ylen, int *invmap, const int invmap_only=1)
 {
-    int i=xlen;
-    int j=ylen;
+    int i,j;
     int gaplen,p;
-    char *buf=new char [MAX(xlen,ylen)+1];
+    char *buf=NULL;
+
+    if (invmap_only) for (j = 0; j < ylen; j++) invmap[j] = -1;
+    if (invmap_only!=1) buf=new char [MAX(xlen,ylen)+1];
 
+    i=xlen;
+    j=ylen;
     while(i+j)
     {
         gaplen=0;
@@ -206,6 +210,7 @@ void trace_back_gotoh(const char *seqx, const char *seqy,
         {
             gaplen=JumpH[i][j];
             j-=gaplen;
+            if (invmap_only==1) continue;
             strncpy(buf,seqy+j,gaplen);
             buf[gaplen]=0;
             seqyA=buf+seqyA;
@@ -217,6 +222,7 @@ void trace_back_gotoh(const char *seqx, const char *seqy,
         {
             gaplen=JumpV[i][j];
             i-=gaplen;
+            if (invmap_only==1) continue;
             strncpy(buf,seqx+i,gaplen);
             buf[gaplen]=0;
             seqxA=buf+seqxA;
@@ -246,10 +252,14 @@ void trace_back_gotoh(const char *seqx, const char *seqy,
             }
             i--;
             j--;
-            seqxA=seqx[i]+seqxA;
-            seqyA=seqy[j]+seqyA;
+            if (invmap_only) invmap[j]=i;
+            if (invmap_only!=1)
+            {
+                seqxA=seqx[i]+seqxA;
+                seqyA=seqy[j]+seqyA;
+            }
         }
-    }   
+    }
     delete [] buf;
 }
 
@@ -257,16 +267,20 @@ void trace_back_gotoh(const char *seqx, const char *seqy,
 /* trace back Smith-Waterman dynamic programming path to diciper 
  * pairwise local alignment */
 void trace_back_sw(const char *seqx, const char *seqy,
-    int **JumpH, int **JumpV, int **P,
-    string& seqxA, string& seqyA, const int xlen, const int ylen)
+    int **JumpH, int **JumpV, int **P, string& seqxA, string& seqyA,
+    const int xlen, const int ylen, int *invmap, const int invmap_only=1)
 {
-    int i=xlen;
-    int j=ylen;
+    int i;
+    int j;
     int gaplen,p;
-    char *buf=new char [xlen+ylen+1];
+    bool found_start_cell=false; // find the first non-zero cell in P
+    char *buf=NULL;
+
+    if (invmap_only) for (j = 0; j < ylen; j++) invmap[j] = -1;
+    if (invmap_only!=1) buf=new char [MAX(xlen,ylen)+1];
 
-    // find the first non-zero cell in P
-    bool found_start_cell=false;
+    i=xlen;
+    j=ylen;
     for (i=xlen;i>=0;i--)
     {
         for (j=ylen;j>=0;j--)
@@ -281,19 +295,22 @@ void trace_back_sw(const char *seqx, const char *seqy,
     }
 
     /* copy C terminal sequence */
-    for (p=0;p<ylen-j;p++) buf[p]='-';
-    buf[ylen-j]=0;
-    seqxA=buf;
-    strncpy(buf,seqx+i,xlen-i);
-    buf[xlen-i]=0;
-    seqxA+=buf;
-
-    strncpy(buf,seqy+j,ylen-j);
-    buf[ylen-j]=0;
-    seqyA+=buf;
-    for (p=0;p<xlen-i;p++) buf[p]='-';
-    buf[xlen-i]=0;
-    seqyA+=buf;
+    if (invmap_only!=1)
+    {
+        for (p=0;p<ylen-j;p++) buf[p]='-';
+        buf[ylen-j]=0;
+        seqxA=buf;
+        strncpy(buf,seqx+i,xlen-i);
+        buf[xlen-i]=0;
+        seqxA+=buf;
+
+        strncpy(buf,seqy+j,ylen-j);
+        buf[ylen-j]=0;
+        seqyA+=buf;
+        for (p=0;p<xlen-i;p++) buf[p]='-';
+        buf[xlen-i]=0;
+        seqyA+=buf;
+    }
 
     if (i<0||j<0)
     {
@@ -309,6 +326,7 @@ void trace_back_sw(const char *seqx, const char *seqy,
         {
             gaplen=JumpH[i][j];
             j-=gaplen;
+            if (invmap_only==1) continue;
             strncpy(buf,seqy+j,gaplen);
             buf[gaplen]=0;
             seqyA=buf+seqyA;
@@ -320,6 +338,7 @@ void trace_back_sw(const char *seqx, const char *seqy,
         {
             gaplen=JumpV[i][j];
             i-=gaplen;
+            if (invmap_only==1) continue;
             strncpy(buf,seqx+i,gaplen);
             buf[gaplen]=0;
             seqxA=buf+seqxA;
@@ -331,26 +350,38 @@ void trace_back_sw(const char *seqx, const char *seqy,
         {
             i--;
             j--;
-            seqxA=seqx[i]+seqxA;
-            seqyA=seqy[j]+seqyA;
+            if (invmap_only) invmap[j]=i;
+            if (invmap_only!=1)
+            {
+                seqxA=seqx[i]+seqxA;
+                seqyA=seqy[j]+seqyA;
+            }
         }
     }
     /* copy N terminal sequence */
-    for (p=0;p<j;p++) buf[p]='-';
-    strncpy(buf+j,seqx,i);
-    buf[i+j]=0;
-    seqxA=buf+seqxA;
-
-    strncpy(buf,seqy,j);
-    for (p=j;p<j+i;p++) buf[p]='-';
-    buf[i+j]=0;
-    seqyA=buf+seqyA;
+    if (invmap_only!=1)
+    {
+        for (p=0;p<j;p++) buf[p]='-';
+        strncpy(buf+j,seqx,i);
+        buf[i+j]=0;
+        seqxA=buf+seqxA;
+
+        strncpy(buf,seqy,j);
+        for (p=j;p<j+i;p++) buf[p]='-';
+        buf[i+j]=0;
+        seqyA=buf+seqyA;
+    }
     delete [] buf;
 }
 
-/* entry function for NWalign */
-int NWalign(const char *seqx, const char *seqy, const int xlen, const int ylen,
-    string & seqxA,string & seqyA, const int mol_type, const int glocal=0)
+/* entry function for NWalign
+ * invmap_only - whether to return seqxA and seqyA or to return invmap
+ *               0: only return seqxA and seqyA
+ *               1: only return invmap
+ *               2: return seqxA, seqyA and invmap */
+int NWalign_main(const char *seqx, const char *seqy, const int xlen,
+    const int ylen, string & seqxA, string & seqyA, const int mol_type,
+    int *invmap, const int invmap_only=0, const int glocal=0)
 {
     int **JumpH;
     int **JumpV;
@@ -369,6 +400,11 @@ int NWalign(const char *seqx, const char *seqy, const int xlen, const int ylen,
     {
         gapopen=gapopen_blastn;
         gapext =gapext_blastn;
+        if (glocal==3)
+        {
+            gapopen=-5;
+            gapext =-2;
+        }
     }
 
     for (i=0;i<xlen+1;i++)
@@ -383,9 +419,14 @@ int NWalign(const char *seqx, const char *seqy, const int xlen, const int ylen,
     aln_score=calculate_score_gotoh(xlen, ylen, S, JumpH, JumpV, P,
         gapopen, gapext, glocal);
 
-    if (glocal<3) trace_back_gotoh(seqx,seqy,JumpH,JumpV,P,seqxA,seqyA,xlen,ylen);
-    else trace_back_sw(seqx,seqy,JumpH,JumpV,P,seqxA,seqyA,xlen,ylen);
-    
+    seqxA.clear();
+    seqyA.clear();
+
+    if (glocal<3) trace_back_gotoh(seqx, seqy, JumpH, JumpV, P,
+            seqxA, seqyA, xlen, ylen, invmap, invmap_only);
+    else trace_back_sw(seqx, seqy, JumpH, JumpV, P, seqxA, seqyA,
+            xlen, ylen, invmap, invmap_only);
+
     DeleteArray(&JumpH, xlen+1);
     DeleteArray(&JumpV, xlen+1);
     DeleteArray(&P, xlen+1);
@@ -393,7 +434,23 @@ int NWalign(const char *seqx, const char *seqy, const int xlen, const int ylen,
     return aln_score; // aligment score
 }
 
-double get_seqID(const string& seqxA, const string& seqyA,
+void get_seqID(int *invmap, const char *seqx, const char *seqy, 
+    const int ylen, double &Liden,int &L_ali)
+{
+    Liden=0;
+    L_ali=0;
+    int i,j;
+    for (j=0;j<ylen;j++)
+    {
+        i=invmap[j];
+        if (i<0) continue;
+        L_ali+=1;
+        Liden+=(seqx[i]==seqy[j]);
+    }
+    //return L_ali?1.*Liden/L_ali:0;
+}
+
+void get_seqID(const string& seqxA, const string& seqyA,
     string &seqM,double &Liden,int &L_ali)
 {
     Liden=0;
@@ -408,10 +465,9 @@ double get_seqID(const string& seqxA, const string& seqyA,
         else seqM+=' ';
         L_ali+=(seqxA[i]!='-' && seqyA[i]!='-');
     }
-    return 1.*Liden/L_ali;
+    //return L_ali?1.*Liden/L_ali:0;
 }
 
-
 void output_NWalign_results(
     const string xname, const string yname,
     const char *chainID1, const char *chainID2,
diff --git a/modules/bindings/src/tmalign/OST_INFO b/modules/bindings/src/tmalign/OST_INFO
index f0a86222755c880d5a188a88124bdfdf7d3de488..16ce115699e3266a2a8d6dd7eecfafc01758aa6e 100644
--- a/modules/bindings/src/tmalign/OST_INFO
+++ b/modules/bindings/src/tmalign/OST_INFO
@@ -1,7 +1,7 @@
-Source code has been cloned February 17 2019 from:
+Source code has been cloned August 2 2022 from:
 
 https://github.com/kad-ecoli/TMalign
 
 last commit:
-2ea5b61c6b0c8ded05ff0aea09546d45902b3741
+f0824499d8ab4fa84b2e75d253de80ab2c894c56
 
diff --git a/modules/bindings/src/tmalign/TMalign.cpp b/modules/bindings/src/tmalign/TMalign.cpp
index f62fc45848fe54f518bfc373d7a39738c53371a0..7ea33e1a72155157b385a45a1710352813e52a1e 100644
--- a/modules/bindings/src/tmalign/TMalign.cpp
+++ b/modules/bindings/src/tmalign/TMalign.cpp
@@ -8,12 +8,12 @@ void print_version()
 {
     cout << 
 "\n"
-" *********************************************************************\n"
-" * TM-align (Version 20190209): protein and RNA structure alignment  *\n"
-" * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n"
-" *             S Gong, C Zhang, Y Zhang. Bioinformatics (2019)       *\n"
-" * Please email comments and suggestions to yangzhanglab@umich.edu   *\n"
-" *********************************************************************"
+" **********************************************************************\n"
+" * TM-align (Version 20210520): protein and RNA structure alignment   *\n"
+" * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005)  *\n"
+" *             S Gong, C Zhang, Y Zhang. Bioinformatics, bz282 (2019) *\n"
+" * Please email comments and suggestions to yangzhanglab@umich.edu    *\n"
+" **********************************************************************"
     << endl;
 }
 
@@ -36,6 +36,9 @@ void print_extra_help()
 "             under 'chain2_folder'\n"
 "             $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n"
 "\n"
+"    -pair    (Only when -dir1 and -dir2 are set, default is no) whether to\n"
+"             perform pair alignment rather than all-against-all alignment\n"
+"\n"
 "    -suffix  (Only when -dir1 and/or -dir2 are set, default is empty)\n"
 "             add file name suffix to files listed by chain1_list or chain2_list\n"
 "\n"
@@ -82,6 +85,16 @@ void print_extra_help()
 "                  0: (default, same as F) normalized by second structure\n"
 "                  1: same as T, normalized by average structure length\n"
 "\n"
+"    -cp      ALignment with circular permutation\n"
+"\n"
+"    -mirror  Whether to align the mirror image of input structure\n"
+"             0: (default) do not align mirrored structure\n"
+"             1: align mirror of chain1 to origin chain2\n"
+"\n"
+"    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
 "    -infmt1  Input format for chain1\n"
 "    -infmt2  Input format for chain2\n"
 "            -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
@@ -109,7 +122,7 @@ void print_help(bool h_opt=false)
 "\n"
 "    -i    Start with an alignment specified in fasta file 'align.txt'\n"
 "\n"
-"    -I    Stick to the alignment 'align.txt'\n"
+"    -I    Stick to the alignment specified in 'align.txt'\n"
 "\n"
 "    -m    Output TM-align rotation matrix\n"
 "\n"
@@ -161,8 +174,7 @@ int main(int argc, char *argv[])
     bool h_opt = false; // print full help message
     bool v_opt = false; // print version
     bool m_opt = false; // flag for -m, output rotation matrix
-    bool i_opt = false; // flag for -i, with user given initial alignment
-    bool I_opt = false; // flag for -I, stick to user given alignment
+    int  i_opt = 0;     // 1 for -i, 3 for -I
     bool o_opt = false; // flag for -o, output superposed structure
     int  a_opt = 0;     // flag for -a, do not normalized by average length
     bool u_opt = false; // flag for -u, normalized by user specified length
@@ -175,12 +187,16 @@ int main(int argc, char *argv[])
     int    split_opt =0;     // do not split chain
     int    outfmt_opt=0;     // set -outfmt to full output
     bool   fast_opt  =false; // flags for -fast, fTM-align algorithm
+    int    cp_opt    =0;     // do not check circular permutation
+    int    mirror_opt=0;     // do not align mirror
+    int    het_opt   =0;     // do not read HETATM residues
     string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
     string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
     string suffix_opt="";    // set -suffix to empty
     string dir_opt   ="";    // set -dir to empty
     string dir1_opt  ="";    // set -dir1 to empty
     string dir2_opt  ="";    // set -dir2 to empty
+    bool   pair_opt=false;   // pair alignment
     int    byresi_opt=0;     // set -byresi to 0
     vector<string> chain1_list; // only when -dir1 is set
     vector<string> chain2_list; // only when -dir2 is set
@@ -222,16 +238,20 @@ int main(int argc, char *argv[])
         }
         else if ( !strcmp(argv[i],"-i") && i < (argc-1) )
         {
-            fname_lign = argv[i + 1];      i_opt = true; i++;
+            if (i_opt==3)
+                PrintErrorAndQuit("ERROR! -i and -I cannot be used together");
+            fname_lign = argv[i + 1];      i_opt = 1; i++;
+        }
+        else if (!strcmp(argv[i], "-I") && i < (argc-1) )
+        {
+            if (i_opt==1)
+                PrintErrorAndQuit("ERROR! -I and -i cannot be used together");
+            fname_lign = argv[i + 1];      i_opt = 3; i++;
         }
         else if (!strcmp(argv[i], "-m") && i < (argc-1) )
         {
             fname_matrix = argv[i + 1];    m_opt = true; i++;
         }// get filename for rotation matrix
-        else if (!strcmp(argv[i], "-I") && i < (argc-1) )
-        {
-            fname_lign = argv[i + 1];      I_opt = true; i++;
-        }
         else if (!strcmp(argv[i], "-fast"))
         {
             fast_opt = true;
@@ -272,6 +292,10 @@ int main(int argc, char *argv[])
         {
             dir2_opt=argv[i + 1]; i++;
         }
+        else if ( !strcmp(argv[i],"-pair") )
+        {
+            pair_opt=true;
+        }
         else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) )
         {
             suffix_opt=argv[i + 1]; i++;
@@ -288,6 +312,18 @@ int main(int argc, char *argv[])
         {
             byresi_opt=atoi(argv[i + 1]); i++;
         }
+        else if ( !strcmp(argv[i],"-cp") )
+        {
+            cp_opt=1;
+        }
+        else if ( !strcmp(argv[i],"-mirror") && i < (argc-1) )
+        {
+            mirror_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
         else if (xname.size() == 0) xname=argv[i];
         else if (yname.size() == 0) yname=argv[i];
         else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
@@ -320,16 +356,14 @@ int main(int argc, char *argv[])
             PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2");
     }
     if (atom_opt.size()!=4)
-        PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
     if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
-        PrintErrorAndQuit("ERROR! molecule type must be either RNA or protein.");
+        PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein.");
     else if (mol_opt=="protein" && atom_opt=="auto")
         atom_opt=" CA ";
     else if (mol_opt=="RNA" && atom_opt=="auto")
         atom_opt=" C3'";
 
-    if (i_opt && I_opt)
-        PrintErrorAndQuit("ERROR! -I and -i cannot be used together");
     if (u_opt && Lnorm_ass<=0)
         PrintErrorAndQuit("Wrong value for option -u!  It should be >0");
     if (d_opt && d0_scale<=0)
@@ -338,7 +372,7 @@ int main(int argc, char *argv[])
         PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d");
     if (byresi_opt!=0)
     {
-        if (i_opt || I_opt)
+        if (i_opt)
             PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I");
         if (byresi_opt<0 || byresi_opt>3)
             PrintErrorAndQuit("-byresi can only be 0, 1, 2 or 3");
@@ -351,11 +385,15 @@ int main(int argc, char *argv[])
         PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1");
     if (split_opt<0 || split_opt>2)
         PrintErrorAndQuit("-split can only be 0, 1 or 2");
+    if (cp_opt!=0 && cp_opt!=1)
+        PrintErrorAndQuit("-cp can only be 0 or 1");
+    if (cp_opt && i_opt)
+        PrintErrorAndQuit("-cp cannot be used with -i or -I");
 
     /* read initial alignment file from 'align.txt' */
-    if (i_opt || I_opt) read_user_alignment(sequence, fname_lign, I_opt);
+    if (i_opt) read_user_alignment(sequence, fname_lign, i_opt);
 
-    if (byresi_opt) I_opt=true;
+    if (byresi_opt) i_opt=3;
 
     if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt
         PrintErrorAndQuit("ERROR! Please provide a file name for option -m!");
@@ -383,16 +421,19 @@ int main(int argc, char *argv[])
     vector<string> chainID_list2;      // list of chainID2
     int    i,j;                // file index
     int    chain_i,chain_j;    // chain index
+    int    r;                  // residue index
     int    xlen, ylen;         // chain length
     int    xchainnum,ychainnum;// number of chains in a PDB file
     char   *seqx, *seqy;       // for the protein sequence 
-    int    *secx, *secy;       // for the secondary structure 
+    char   *secx, *secy;       // for the secondary structure 
     double **xa, **ya;         // for input vectors xa[0...xlen-1][0..2] and
                                // ya[0...ylen-1][0..2], in general,
                                // ya is regarded as native structure 
                                // --> superpose xa onto ya
     vector<string> resi_vec1;  // residue index for chain1
     vector<string> resi_vec2;  // residue index for chain2
+    int read_resi=byresi_opt;  // whether to read residue index
+    if (byresi_opt==0 && o_opt) read_resi=2;
 
     /* loop over file names */
     for (i=0;i<chain1_list.size();i++)
@@ -400,7 +441,7 @@ int main(int argc, char *argv[])
         /* parse chain 1 */
         xname=chain1_list[i];
         xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
-            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt);
+            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
         if (!xchainnum)
         {
             cerr<<"Warning! Cannot parse file: "<<xname
@@ -418,27 +459,30 @@ int main(int argc, char *argv[])
                     <<". Chain length 0."<<endl;
                 continue;
             }
-            else if (xlen<=5)
+            else if (xlen<3)
             {
-                cerr<<"Sequence is too short <=5!: "<<xname<<endl;
+                cerr<<"Sequence is too short <3!: "<<xname<<endl;
                 continue;
             }
             NewArray(&xa, xlen, 3);
             seqx = new char[xlen + 1];
-            secx = new int[xlen];
+            secx = new char[xlen + 1];
             xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, 
-                resi_vec1, byresi_opt);
+                resi_vec1, read_resi);
+            if (mirror_opt) for (r=0;r<xlen;r++) xa[r][2]=-xa[r][2];
             if (mol_vec1[chain_i]>0) make_sec(seqx,xa, xlen, secx,atom_opt);
             else make_sec(xa, xlen, secx); // secondary structure assignment
 
             for (j=(dir_opt.size()>0)*(i+1);j<chain2_list.size();j++)
             {
+                if (pair_opt && j!=i) continue;
                 /* parse chain 2 */
                 if (PDB_lines2.size()==0)
                 {
                     yname=chain2_list[j];
                     ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
-                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt);
+                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
+                        het_opt);
                     if (!ychainnum)
                     {
                         cerr<<"Warning! Cannot parse file: "<<yname
@@ -457,16 +501,16 @@ int main(int argc, char *argv[])
                             <<". Chain length 0."<<endl;
                         continue;
                     }
-                    else if (ylen<=5)
+                    else if (ylen<3)
                     {
-                        cerr<<"Sequence is too short <=5!: "<<yname<<endl;
+                        cerr<<"Sequence is too short <3!: "<<yname<<endl;
                         continue;
                     }
                     NewArray(&ya, ylen, 3);
                     seqy = new char[ylen + 1];
-                    secy = new int[ylen];
+                    secy = new char[ylen + 1];
                     ylen = read_PDB(PDB_lines2[chain_j], ya, seqy,
-                        resi_vec2, byresi_opt);
+                        resi_vec2, read_resi);
                     if (mol_vec2[chain_j]>0)
                          make_sec(seqy, ya, ylen, secy, atom_opt);
                     else make_sec(ya, ylen, secy);
@@ -490,33 +534,43 @@ int main(int argc, char *argv[])
                     int n_ali8=0;
 
                     /* entry function for structure alignment */
-                    TMalign_main(
+                    if (cp_opt) CPalign_main(
+                        xa, ya, seqx, seqy, secx, secy,
+                        t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                        seqM, seqxA, seqyA,
+                        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                        i_opt, a_opt, u_opt, d_opt, fast_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j],TMcut);
+                    else TMalign_main(
                         xa, ya, seqx, seqy, secx, secy,
                         t0, u0, TM1, TM2, TM3, TM4, TM5,
                         d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
                         seqM, seqxA, seqyA,
                         rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
                         xlen, ylen, sequence, Lnorm_ass, d0_scale,
-                        i_opt, I_opt, a_opt, u_opt, d_opt, fast_opt,
+                        i_opt, a_opt, u_opt, d_opt, fast_opt,
                         mol_vec1[chain_i]+mol_vec2[chain_j],TMcut);
 
                     /* print result */
                     if (outfmt_opt==0) print_version();
                     output_results(
-                        xname.substr(dir1_opt.size()),
-                        yname.substr(dir2_opt.size()),
+                        xname.substr(dir1_opt.size()+dir_opt.size()),
+                        yname.substr(dir2_opt.size()+dir_opt.size()),
                         chainID_list1[chain_i].c_str(),
                         chainID_list2[chain_j].c_str(),
                         xlen, ylen, t0, u0, TM1, TM2, 
                         TM3, TM4, TM5, rmsd0, d0_out,
                         seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
-                        n_ali8, n_ali, L_ali, TM_ali, rmsd_ali,
+                        n_ali8, L_ali, TM_ali, rmsd_ali,
                         TM_0, d0_0, d0A, d0B,
                         Lnorm_ass, d0_scale, d0a, d0u, 
                         (m_opt?fname_matrix+chainID_list1[chain_i]:"").c_str(),
-                        outfmt_opt, ter_opt, 
+                        outfmt_opt, ter_opt, 0, split_opt, o_opt,
                         (o_opt?fname_super+chainID_list1[chain_i]:"").c_str(),
-                        i_opt, I_opt, a_opt, u_opt, d_opt);
+                        i_opt, a_opt, u_opt, d_opt,mirror_opt,
+                        resi_vec1, resi_vec2 );
 
                     /* Done! Free memory */
                     seqM.clear();
diff --git a/modules/bindings/src/tmalign/TMalign.h b/modules/bindings/src/tmalign/TMalign.h
index 08caeec3a2b97f6cdb49a1a7d22b660b2c60b2eb..9187ad3cbd170541a4c72baf44895c86185183d2 100644
--- a/modules/bindings/src/tmalign/TMalign.h
+++ b/modules/bindings/src/tmalign/TMalign.h
@@ -36,7 +36,7 @@ int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
             }
             else score_sum += 1/(1+di/d02);
         }
-        //there are not enough feasible pairs, reliefe the threshold         
+        //there are not enough feasible pairs, relieve the threshold         
         if(n_cut<3 && n_ali>3)
         {
             inc++;
@@ -81,7 +81,7 @@ int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
                 score_sum += 1 / (1 + di / d02);
             }
         }
-        //there are not enough feasible pairs, reliefe the threshold         
+        //there are not enough feasible pairs, relieve the threshold         
         if (n_cut<3 && n_ali>3)
         {
             inc++;
@@ -137,7 +137,7 @@ double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm,
     //find the maximum score starting from local structures superposition
     int i_ali[kmax], n_cut;
     int L_frag; //fragment length
-    int iL_max; //maximum starting postion for the fragment
+    int iL_max; //maximum starting position for the fragment
     
     for(i_init=0; i_init<n_init; i_init++)
     {
@@ -291,7 +291,7 @@ double TMscore8_search_standard( double **r1, double **r2,
     //find the maximum score starting from local structures superposition
     int i_ali[kmax], n_cut;
     int L_frag; //fragment length
-    int iL_max; //maximum starting postion for the fragment
+    int iL_max; //maximum starting position for the fragment
 
     for (i_init = 0; i_init<n_init; i_init++)
     {
@@ -635,7 +635,7 @@ double get_initial(double **r1, double **r2, double **xtm, double **ytm,
     double t[3], double u[3][3])
 {
     int min_len=getmin(xlen, ylen);
-    if(min_len<=5) PrintErrorAndQuit("Sequence is too short <=5!\n");
+    if(min_len<3) PrintErrorAndQuit("Sequence is too short <3!\n");
     
     int min_ali= min_len/2;              //minimum size of considered fragment 
     if(min_ali<=5)  min_ali=5;    
@@ -724,17 +724,17 @@ void smooth(int *sec, int len)
 
 }
 
-int sec_str(double dis13, double dis14, double dis15,
+char sec_str(double dis13, double dis14, double dis15,
             double dis24, double dis25, double dis35)
 {
-    int s=1;
+    char s='C';
     
     double delta=2.1;
     if (fabs(dis15-6.37)<delta && fabs(dis14-5.18)<delta && 
         fabs(dis25-5.18)<delta && fabs(dis13-5.45)<delta &&
         fabs(dis24-5.45)<delta && fabs(dis35-5.45)<delta)
     {
-        s=2; //helix                        
+        s='H'; //helix                        
         return s;
     }
 
@@ -743,24 +743,24 @@ int sec_str(double dis13, double dis14, double dis15,
         fabs(dis25-10.4)<delta && fabs(dis13-6.1 )<delta &&
         fabs(dis24-6.1 )<delta && fabs(dis35-6.1 )<delta)
     {
-        s=4; //strand
+        s='E'; //strand
         return s;
     }
 
-    if (dis15 < 8) s=3; //turn
+    if (dis15 < 8) s='T'; //turn
     return s;
 }
 
 
-/* secondary stucture assignment for protein:
+/* secondary structure assignment for protein:
  * 1->coil, 2->helix, 3->turn, 4->strand */
-void make_sec(double **x, int len, int *sec)
+void make_sec(double **x, int len, char *sec)
 {
     int j1, j2, j3, j4, j5;
     double d13, d14, d15, d24, d25, d35;
     for(int i=0; i<len; i++)
     {     
-        sec[i]=1;
+        sec[i]='C';
         j1=i-2;
         j2=i-1;
         j3=i;
@@ -778,6 +778,7 @@ void make_sec(double **x, int len, int *sec)
             sec[i]=sec_str(d13, d14, d15, d24, d25, d35);            
         }    
     } 
+    sec[len]=0;
 }
 
 /* a c d b: a paired to b, c paired to d */
@@ -809,10 +810,10 @@ void sec_str(int len,char *seq, const vector<vector<bool> >&bp,
 
 /* secondary structure assignment for RNA:
  * 1->unpair, 2->paired with upstream, 3->paired with downstream */
-void make_sec(char *seq, double **x, int len, int *sec,const string atom_opt)
+void make_sec(char *seq, double **x, int len, char *sec,const string atom_opt)
 {
-    int ii,jj,j;
-    unsigned int i;
+    int ii,jj,i,j;
+
     float lb=12.5; // lower bound for " C3'"
     float ub=15.0; // upper bound for " C3'"
     if     (atom_opt==" C4'") {lb=14.0;ub=16.0;}
@@ -825,9 +826,9 @@ void make_sec(char *seq, double **x, int len, int *sec,const string atom_opt)
     vector<bool> bp_tmp(len,false);
     vector<vector<bool> > bp(len,bp_tmp);
     bp_tmp.clear();
-    for (i=0;(int) i<len; i++)
+    for (i=0; i<len; i++)
     {
-        sec[i]=1;
+        sec[i]='.';
         for (j=i+1; j<len; j++)
         {
             if (((seq[i]=='u'||seq[i]=='t')&&(seq[j]=='a'             ))||
@@ -843,7 +844,7 @@ void make_sec(char *seq, double **x, int len, int *sec,const string atom_opt)
     
     // From 5' to 3': A0 C0 D0 B0: A0 paired to B0, C0 paired to D0
     vector<int> A0,B0,C0,D0;
-    for (i=0;(int) i<len-2; i++)
+    for (i=0; i<len-2; i++)
     {
         for (j=i+3; j<len; j++)
         {
@@ -859,7 +860,7 @@ void make_sec(char *seq, double **x, int len, int *sec,const string atom_opt)
     }
     
     //int sign;
-    for (i=0; i<A0.size();i++)
+    for (i=0;i<A0.size();i++)
     {
         /*
         sign=0;
@@ -889,10 +890,11 @@ void make_sec(char *seq, double **x, int len, int *sec,const string atom_opt)
         for (j=0;;j++)
         {
             if(A0[i]+j>C0[i]) break;
-            sec[A0[i]+j]=2;
-            sec[D0[i]+j]=3;
+            sec[A0[i]+j]='<';
+            sec[D0[i]+j]='>';
         }
     }
+    sec[len]=0;
 
     /* clean up */
     A0.clear();
@@ -909,7 +911,7 @@ void make_sec(char *seq, double **x, int len, int *sec,const string atom_opt)
 //the jth element in y is aligned to the ith element in x if i>=0 
 //the jth element in y is aligned to a gap in x if i==-1
 void get_initial_ss(bool **path, double **val,
-    const int *secx, const int *secy, int xlen, int ylen, int *y2x)
+    const char *secx, const char *secy, int xlen, int ylen, int *y2x)
 {
     double gap_open=-1.0;
     NWDP_TM(path, val, secx, secy, xlen, ylen, gap_open, y2x);
@@ -1022,10 +1024,9 @@ bool get_initial5( double **r1, double **r2, double **xtm, double **ytm,
     return flag;
 }
 
-void score_matrix_rmsd_sec( double **r1, double **r2,
-    double **score, const int *secx, const int *secy,
-    double **x, double **y, int xlen, int ylen,
-    int *y2x, const double D0_MIN, double d0)
+void score_matrix_rmsd_sec( double **r1, double **r2, double **score,
+    const char *secx, const char *secy, double **x, double **y,
+    int xlen, int ylen, int *y2x, const double D0_MIN, double d0)
 {
     double t[3], u[3][3];
     double rmsd, dij;
@@ -1076,7 +1077,7 @@ void score_matrix_rmsd_sec( double **r1, double **r2,
 //the jth element in y is aligned to the ith element in x if i>=0 
 //the jth element in y is aligned to a gap in x if i==-1
 void get_initial_ssplus(double **r1, double **r2, double **score, bool **path,
-    double **val, const int *secx, const int *secy, double **x, double **y,
+    double **val, const char *secx, const char *secy, double **x, double **y,
     int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0)
 {
     //create score matrix for DP
@@ -1457,35 +1458,41 @@ double DP_iter(double **r1, double **r2, double **xtm, double **ytm,
 }
 
 
-void output_superpose(const string filename, const char *fname_super,
-    double t[3], double u[3][3], const int ter_opt=3)
+void output_pymol(const string xname, const string yname,
+    const string fname_super, double t[3], double u[3][3], const int ter_opt, 
+    const int mm_opt, const int split_opt, const int mirror_opt,
+    const char *seqM, const char *seqxA, const char *seqyA,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2,
+    const string chainID1, const string chainID2)
 {
     int compress_type=0; // uncompressed file
     ifstream fin;
     redi::ipstream fin_gz; // if file is compressed
-    if (filename.size()>=3 && 
-        filename.substr(filename.size()-3,3)==".gz")
+    if (xname.size()>=3 && 
+        xname.substr(xname.size()-3,3)==".gz")
     {
-        fin_gz.open("zcat "+filename);
+        fin_gz.open("zcat "+xname);
         compress_type=1;
     }
-    else if (filename.size()>=4 && 
-        filename.substr(filename.size()-4,4)==".bz2")
+    else if (xname.size()>=4 && 
+        xname.substr(xname.size()-4,4)==".bz2")
     {
-        fin_gz.open("bzcat "+filename);
+        fin_gz.open("bzcat "+xname);
         compress_type=2;
     }
-    else fin.open(filename.c_str());
+    else fin.open(xname.c_str());
 
     stringstream buf;
+    stringstream buf_pymol;
     string line;
     double x[3];  // before transform
     double x1[3]; // after transform
 
     /* for PDBx/mmCIF only */
-    map<string,unsigned int> _atom_site;
-    unsigned int atom_site_pos;
+    map<string,int> _atom_site;
+    size_t atom_site_pos;
     vector<string> line_vec;
+    int infmt=-1; // 0 - PDB, 3 - PDBx/mmCIF
 
     while (compress_type?fin_gz.good():fin.good())
     {
@@ -1494,9 +1501,11 @@ void output_superpose(const string filename, const char *fname_super,
         if (line.compare(0, 6, "ATOM  ")==0 || 
             line.compare(0, 6, "HETATM")==0) // PDB format
         {
+            infmt=0;
             x[0]=atof(line.substr(30,8).c_str());
             x[1]=atof(line.substr(38,8).c_str());
             x[2]=atof(line.substr(46,8).c_str());
+            if (mirror_opt) x[2]=-x[2];
             transform(t, u, x, x1);
             buf<<line.substr(0,30)<<setiosflags(ios::fixed)
                 <<setprecision(3)
@@ -1505,9 +1514,22 @@ void output_superpose(const string filename, const char *fname_super,
         }
         else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
         {
+            infmt=3;
             buf<<line<<'\n';
-            if (compress_type) getline(fin_gz, line);
-            else               getline(fin, line);
+            while(1)
+            {
+                if (compress_type) 
+                {
+                    if (fin_gz.good()) getline(fin_gz, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                }
+                else
+                {
+                    if (fin.good()) getline(fin, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                }
+                if (line.size()) break;
+            }
             buf<<line<<'\n';
             if (line.compare(0,11,"_atom_site.")) continue;
             _atom_site.clear();
@@ -1515,8 +1537,20 @@ void output_superpose(const string filename, const char *fname_super,
             _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
             while(1)
             {
-                if (compress_type) getline(fin_gz, line);
-                else               getline(fin, line);
+                while(1)
+                {
+                    if (compress_type) 
+                    {
+                        if (fin_gz.good()) getline(fin_gz, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                    }
+                    else
+                    {
+                        if (fin.good()) getline(fin, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                    }
+                    if (line.size()) break;
+                }
                 if (line.compare(0,11,"_atom_site.")) break;
                 _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
                 buf<<line<<'\n';
@@ -1542,10 +1576,11 @@ void output_superpose(const string filename, const char *fname_super,
                 x[0]=atof(line_vec[_atom_site["Cartn_x"]].c_str());
                 x[1]=atof(line_vec[_atom_site["Cartn_y"]].c_str());
                 x[2]=atof(line_vec[_atom_site["Cartn_z"]].c_str());
+                if (mirror_opt) x[2]=-x[2];
                 transform(t, u, x, x1);
 
                 for (atom_site_pos=0; atom_site_pos<_atom_site.size(); atom_site_pos++)
-                { 
+                {
                     if (atom_site_pos==_atom_site["Cartn_x"])
                         buf<<setiosflags(ios::fixed)<<setprecision(3)
                            <<setw(8)<<x1[0]<<' ';
@@ -1574,10 +1609,829 @@ void output_superpose(const string filename, const char *fname_super,
     if (compress_type) fin_gz.close();
     else               fin.close();
 
-    ofstream fp(fname_super);
+    string fname_super_full=fname_super;
+    if (infmt==0)      fname_super_full+=".pdb";
+    else if (infmt==3) fname_super_full+=".cif";
+    ofstream fp;
+    fp.open(fname_super_full.c_str());
     fp<<buf.str();
     fp.close();
     buf.str(string()); // clear stream
+
+    string chain1_sele;
+    string chain2_sele;
+    int i;
+    if (!mm_opt)
+    {
+        if (split_opt==2 && ter_opt>=1) // align one chain from model 1
+        {
+            chain1_sele=" and c. "+chainID1.substr(1);
+            chain2_sele=" and c. "+chainID2.substr(1);
+        }
+        else if (split_opt==2 && ter_opt==0) // align one chain from each model
+        {
+            for (i=1;i<chainID1.size();i++) if (chainID1[i]==',') break;
+            chain1_sele=" and c. "+chainID1.substr(i+1);
+            for (i=1;i<chainID2.size();i++) if (chainID2[i]==',') break;
+            chain2_sele=" and c. "+chainID2.substr(i+1);
+        }
+    }
+
+    /* extract aligned region */
+    int i1=-1;
+    int i2=-1;
+    string resi1_sele;
+    string resi2_sele;
+    string resi1_bond;
+    string resi2_bond;
+    string prev_resi1;
+    string prev_resi2;
+    string curr_resi1;
+    string curr_resi2;
+    if (mm_opt)
+    {
+        ;
+    }
+    else
+    {
+        for (i=0;i<strlen(seqM);i++)
+        {
+            i1+=(seqxA[i]!='-' && seqxA[i]!='*');
+            i2+=(seqyA[i]!='-');
+            if (seqM[i]==' ' || seqxA[i]=='*') continue;
+            curr_resi1=resi_vec1[i1].substr(0,4);
+            curr_resi2=resi_vec2[i2].substr(0,4);
+            if (resi1_sele.size()==0)
+                resi1_sele =    "i. "+curr_resi1;
+            else
+            {
+                resi1_sele+=" or i. "+curr_resi1;
+                resi1_bond+="bond structure1 and i. "+prev_resi1+
+                                              ", i. "+curr_resi1+"\n";
+            }
+            if (resi2_sele.size()==0)
+                resi2_sele =    "i. "+curr_resi2;
+            else
+            {
+                resi2_sele+=" or i. "+curr_resi2;
+                resi2_bond+="bond structure2 and i. "+prev_resi2+
+                                              ", i. "+curr_resi2+"\n";
+            }
+            prev_resi1=curr_resi1;
+            prev_resi2=curr_resi2;
+            //if (seqM[i]!=':') continue;
+        }
+        if (resi1_sele.size()) resi1_sele=" and ( "+resi1_sele+")";
+        if (resi2_sele.size()) resi2_sele=" and ( "+resi2_sele+")";
+    }
+
+    /* write pymol script */
+    vector<string> pml_list;
+    pml_list.push_back(fname_super+"");
+    pml_list.push_back(fname_super+"_atm");
+    pml_list.push_back(fname_super+"_all");
+    pml_list.push_back(fname_super+"_all_atm");
+    pml_list.push_back(fname_super+"_all_atm_lig");
+
+    for (int p=0;p<pml_list.size();p++)
+    {
+        if (mm_opt && p<=1) continue;
+        buf_pymol
+            <<"#!/usr/bin/env pymol\n"
+            <<"cmd.load(\""<<fname_super_full<<"\", \"structure1\")\n"
+            <<"cmd.load(\""<<yname<<"\", \"structure2\")\n"
+            <<"hide all\n"
+            <<"set all_states, "<<((ter_opt==0)?"on":"off")<<'\n';
+        if (p==0) // .pml
+        {
+            if (chain1_sele.size()) buf_pymol
+                <<"remove structure1 and not "<<chain1_sele.substr(4)<<"\n";
+            if (chain2_sele.size()) buf_pymol
+                <<"remove structure2 and not "<<chain2_sele.substr(4)<<"\n";
+            buf_pymol
+                <<"remove not n. CA and not n. C3'\n"
+                <<resi1_bond
+                <<resi2_bond
+                <<"show stick, structure1"<<chain1_sele<<resi1_sele<<"\n"
+                <<"show stick, structure2"<<chain2_sele<<resi2_sele<<"\n";
+        }
+        else if (p==1) // _atm.pml
+        {
+            buf_pymol
+                <<"show cartoon, structure1"<<chain1_sele<<resi1_sele<<"\n"
+                <<"show cartoon, structure2"<<chain2_sele<<resi2_sele<<"\n";
+        }
+        else if (p==2) // _all.pml
+        {
+            buf_pymol
+                <<"show ribbon, structure1"<<chain1_sele<<"\n"
+                <<"show ribbon, structure2"<<chain2_sele<<"\n";
+        }
+        else if (p==3) // _all_atm.pml
+        {
+            buf_pymol
+                <<"show cartoon, structure1"<<chain1_sele<<"\n"
+                <<"show cartoon, structure2"<<chain2_sele<<"\n";
+        }
+        else if (p==4) // _all_atm_lig.pml
+        {
+            buf_pymol
+                <<"show cartoon, structure1\n"
+                <<"show cartoon, structure2\n"
+                <<"show stick, not polymer\n"
+                <<"show sphere, not polymer\n";
+        }
+        buf_pymol
+            <<"color blue, structure1\n"
+            <<"color red, structure2\n"
+            <<"set ribbon_width, 6\n"
+            <<"set stick_radius, 0.3\n"
+            <<"set sphere_scale, 0.25\n"
+            <<"set ray_shadow, 0\n"
+            <<"bg_color white\n"
+            <<"set transparency=0.2\n"
+            <<"zoom polymer and ((structure1"<<chain1_sele
+            <<") or (structure2"<<chain2_sele<<"))\n"
+            <<endl;
+
+        fp.open((pml_list[p]+".pml").c_str());
+        fp<<buf_pymol.str();
+        fp.close();
+        buf_pymol.str(string());
+    }
+
+    /* clean up */
+    pml_list.clear();
+    
+    resi1_sele.clear();
+    resi2_sele.clear();
+    
+    resi1_bond.clear();
+    resi2_bond.clear();
+    
+    prev_resi1.clear();
+    prev_resi2.clear();
+
+    curr_resi1.clear();
+    curr_resi2.clear();
+
+    chain1_sele.clear();
+    chain2_sele.clear();
+}
+
+void output_rasmol(const string xname, const string yname,
+    const string fname_super, double t[3], double u[3][3], const int ter_opt,
+    const int mm_opt, const int split_opt, const int mirror_opt,
+    const char *seqM, const char *seqxA, const char *seqyA,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2,
+    const string chainID1, const string chainID2,
+    const int xlen, const int ylen, const double d0A, const int n_ali8,
+    const double rmsd, const double TM1, const double Liden)
+{
+    stringstream buf;
+    stringstream buf_all;
+    stringstream buf_atm;
+    stringstream buf_all_atm;
+    stringstream buf_all_atm_lig;
+    //stringstream buf_pdb;
+    stringstream buf_tm;
+    string line;
+    double x[3];  // before transform
+    double x1[3]; // after transform
+    bool after_ter; // true if passed the "TER" line in PDB
+    string asym_id; // chain ID
+
+    buf_tm<<"REMARK US-align"
+        <<"\nREMARK Structure 1:"<<setw(11)<<left<<xname+chainID1<<" Size= "<<xlen
+        <<"\nREMARK Structure 2:"<<setw(11)<<yname+chainID2<<right<<" Size= "<<ylen
+        <<" (TM-score is normalized by "<<setw(4)<<ylen<<", d0="
+        <<setiosflags(ios::fixed)<<setprecision(2)<<setw(6)<<d0A<<")"
+        <<"\nREMARK Aligned length="<<setw(4)<<n_ali8<<", RMSD="
+        <<setw(6)<<setiosflags(ios::fixed)<<setprecision(2)<<rmsd
+        <<", TM-score="<<setw(7)<<setiosflags(ios::fixed)<<setprecision(5)<<TM1
+        <<", ID="<<setw(5)<<setiosflags(ios::fixed)<<setprecision(3)
+        <<((n_ali8>0)?Liden/n_ali8:0)<<endl;
+    string rasmol_CA_header="load inline\nselect *A\nwireframe .45\nselect *B\nwireframe .20\nselect all\ncolor white\n";
+    string rasmol_cartoon_header="load inline\nselect all\ncartoon\nselect *A\ncolor blue\nselect *B\ncolor red\nselect ligand\nwireframe 0.25\nselect solvent\nspacefill 0.25\nselect all\nexit\n"+buf_tm.str();
+    if (!mm_opt) buf<<rasmol_CA_header;
+    buf_all<<rasmol_CA_header;
+    if (!mm_opt) buf_atm<<rasmol_cartoon_header;
+    buf_all_atm<<rasmol_cartoon_header;
+    buf_all_atm_lig<<rasmol_cartoon_header;
+
+    /* selecting chains for -mol */
+    string chain1_sele;
+    string chain2_sele;
+    int i;
+    if (!mm_opt)
+    {
+        if (split_opt==2 && ter_opt>=1) // align one chain from model 1
+        {
+            chain1_sele=chainID1.substr(1);
+            chain2_sele=chainID2.substr(1);
+        }
+        else if (split_opt==2 && ter_opt==0) // align one chain from each model
+        {
+            for (i=1;i<chainID1.size();i++) if (chainID1[i]==',') break;
+            chain1_sele=chainID1.substr(i+1);
+            for (i=1;i<chainID2.size();i++) if (chainID2[i]==',') break;
+            chain2_sele=chainID2.substr(i+1);
+        }
+    }
+
+
+    /* for PDBx/mmCIF only */
+    map<string,int> _atom_site;
+    int atom_site_pos;
+    vector<string> line_vec;
+    string atom; // 4-character atom name
+    string AA;   // 3-character residue name
+    string resi; // 4-character residue sequence number
+    string inscode; // 1-character insertion code
+    string model_index; // model index
+    bool is_mmcif=false;
+
+    /* used for CONECT record of chain1 */
+    int ca_idx1=0; // all CA atoms
+    int lig_idx1=0; // all atoms
+    vector <int> idx_vec;
+
+    /* used for CONECT record of chain2 */
+    int ca_idx2=0; // all CA atoms
+    int lig_idx2=0; // all atoms
+
+    /* extract aligned region */
+    vector<string> resi_aln1;
+    vector<string> resi_aln2;
+    int i1=-1;
+    int i2=-1;
+    if (!mm_opt)
+    {
+        for (i=0;i<strlen(seqM);i++)
+        {
+            i1+=(seqxA[i]!='-');
+            i2+=(seqyA[i]!='-');
+            if (seqM[i]==' ') continue;
+            resi_aln1.push_back(resi_vec1[i1].substr(0,4));
+            resi_aln2.push_back(resi_vec2[i2].substr(0,4));
+            if (seqM[i]!=':') continue;
+            buf    <<"select "<<resi_aln1.back()<<":A,"
+                   <<resi_aln2.back()<<":B\ncolor red\n";
+            buf_all<<"select "<<resi_aln1.back()<<":A,"
+                   <<resi_aln2.back()<<":B\ncolor red\n";
+        }
+        buf<<"select all\nexit\n"<<buf_tm.str();
+    }
+    buf_all<<"select all\nexit\n"<<buf_tm.str();
+
+    ifstream fin;
+    /* read first file */
+    after_ter=false;
+    asym_id="";
+    fin.open(xname.c_str());
+    while (fin.good())
+    {
+        getline(fin, line);
+        if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
+        if (is_mmcif==false && line.size()>=54 &&
+           (line.compare(0, 6, "ATOM  ")==0 ||
+            line.compare(0, 6, "HETATM")==0)) // PDB format
+        {
+            if (line[16]!='A' && line[16]!=' ') continue;
+            x[0]=atof(line.substr(30,8).c_str());
+            x[1]=atof(line.substr(38,8).c_str());
+            x[2]=atof(line.substr(46,8).c_str());
+            if (mirror_opt) x[2]=-x[2];
+            transform(t, u, x, x1);
+            //buf_pdb<<line.substr(0,30)<<setiosflags(ios::fixed)
+                //<<setprecision(3)
+                //<<setw(8)<<x1[0] <<setw(8)<<x1[1] <<setw(8)<<x1[2]
+                //<<line.substr(54)<<'\n';
+
+            if (after_ter && line.compare(0,6,"ATOM  ")==0) continue;
+            lig_idx1++;
+            buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1
+                <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            if (chain1_sele.size() && line[21]!=chain1_sele[0]) continue;
+            if (after_ter || line.compare(0,6,"ATOM  ")) continue;
+            if (ter_opt>=2)
+            {
+                if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1)) 
+                {
+                    after_ter=true;
+                    continue;
+                }
+                asym_id=line[21];
+            }
+            buf_all_atm<<"ATOM  "<<setw(5)<<lig_idx1
+                <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            if (!mm_opt && find(resi_aln1.begin(),resi_aln1.end(),
+                line.substr(22,4))!=resi_aln1.end())
+            {
+                buf_atm<<"ATOM  "<<setw(5)<<lig_idx1
+                    <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                    <<setiosflags(ios::fixed)<<setprecision(3)
+                    <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            }
+            if (line.substr(12,4)!=" CA " && line.substr(12,4)!=" C3'") continue;
+            ca_idx1++;
+            buf_all<<"ATOM  "<<setw(5)<<ca_idx1<<' '
+                <<line.substr(12,4)<<' '<<line.substr(17,3)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
+            if (find(resi_aln1.begin(),resi_aln1.end(),
+                line.substr(22,4))==resi_aln1.end()) continue;
+            if (!mm_opt) buf<<"ATOM  "<<setw(5)<<ca_idx1<<' '
+                <<line.substr(12,4)<<' '<<line.substr(17,3)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
+            idx_vec.push_back(ca_idx1);
+        }
+        else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+        {
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                if (line.size()) break;
+            }
+            if (line.compare(0,11,"_atom_site.")) continue;
+            _atom_site.clear();
+            atom_site_pos=0;
+            _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                if (line.size()==0) continue;
+                if (line.compare(0,11,"_atom_site.")) break;
+                _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+            }
+
+            if (is_mmcif==false)
+            {
+                //buf_pdb.str(string());
+                is_mmcif=true;
+            }
+
+            while(1)
+            {
+                line_vec.clear();
+                split(line,line_vec);
+                if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                    line_vec[_atom_site["group_PDB"]]!="HETATM") break;
+                if (_atom_site.count("pdbx_PDB_model_num"))
+                {
+                    if (model_index.size() && model_index!=
+                        line_vec[_atom_site["pdbx_PDB_model_num"]])
+                        break;
+                    model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+                }
+
+                x[0]=atof(line_vec[_atom_site["Cartn_x"]].c_str());
+                x[1]=atof(line_vec[_atom_site["Cartn_y"]].c_str());
+                x[2]=atof(line_vec[_atom_site["Cartn_z"]].c_str());
+                if (mirror_opt) x[2]=-x[2];
+                transform(t, u, x, x1);
+
+                if (_atom_site.count("label_alt_id")==0 || 
+                    line_vec[_atom_site["label_alt_id"]]=="." ||
+                    line_vec[_atom_site["label_alt_id"]]=="A")
+                {
+                    atom=line_vec[_atom_site["label_atom_id"]];
+                    if (atom[0]=='"') atom=atom.substr(1);
+                    if (atom.size() && atom[atom.size()-1]=='"')
+                        atom=atom.substr(0,atom.size()-1);
+                    if      (atom.size()==0) atom="    ";
+                    else if (atom.size()==1) atom=" "+atom+"  ";
+                    else if (atom.size()==2) atom=" "+atom+" ";
+                    else if (atom.size()==3) atom=" "+atom;
+                    else if (atom.size()>=5) atom=atom.substr(0,4);
+            
+                    AA=line_vec[_atom_site["label_comp_id"]]; // residue name
+                    if      (AA.size()==1) AA="  "+AA;
+                    else if (AA.size()==2) AA=" " +AA;
+                    else if (AA.size()>=4) AA=AA.substr(0,3);
+                
+                    if (_atom_site.count("auth_seq_id"))
+                        resi=line_vec[_atom_site["auth_seq_id"]];
+                    else resi=line_vec[_atom_site["label_seq_id"]];
+                    while (resi.size()<4) resi=' '+resi;
+                    if (resi.size()>4) resi=resi.substr(0,4);
+                
+                    inscode=' ';
+                    if (_atom_site.count("pdbx_PDB_ins_code") && 
+                        line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                        inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+
+                    if (_atom_site.count("auth_asym_id"))
+                    {
+                        if (chain1_sele.size()) after_ter
+                            =line_vec[_atom_site["auth_asym_id"]]!=chain1_sele;
+                        else if (ter_opt>=2 && ca_idx1 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["auth_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["auth_asym_id"]];
+                    }
+                    else if (_atom_site.count("label_asym_id"))
+                    {
+                        if (chain1_sele.size()) after_ter
+                            =line_vec[_atom_site["label_asym_id"]]!=chain1_sele;
+                        if (ter_opt>=2 && ca_idx1 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["label_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["label_asym_id"]];
+                    }
+                    //buf_pdb<<left<<setw(6)
+                        //<<line_vec[_atom_site["group_PDB"]]<<right
+                        //<<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                        //<<AA<<" "<<asym_id[asym_id.size()-1]
+                        //<<resi<<inscode<<"   "
+                        //<<setiosflags(ios::fixed)<<setprecision(3)
+                        //<<setw(8)<<x1[0]
+                        //<<setw(8)<<x1[1]
+                        //<<setw(8)<<x1[2]<<'\n';
+
+                    if (after_ter==false ||
+                        line_vec[_atom_site["group_pdb"]]=="HETATM")
+                    {
+                        lig_idx1++;
+                        buf_all_atm_lig<<left<<setw(6)
+                            <<line_vec[_atom_site["group_PDB"]]<<right
+                            <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                            <<AA<<" A"<<resi<<inscode<<"   "
+                            <<setiosflags(ios::fixed)<<setprecision(3)
+                            <<setw(8)<<x1[0]
+                            <<setw(8)<<x1[1]
+                            <<setw(8)<<x1[2]<<'\n';
+                        if (after_ter==false &&
+                            line_vec[_atom_site["group_PDB"]]=="ATOM")
+                        {
+                            buf_all_atm<<"ATOM  "<<setw(6)
+                                <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                                <<AA<<" A"<<resi<<inscode<<"   "
+                                <<setiosflags(ios::fixed)<<setprecision(3)
+                                <<setw(8)<<x1[0]
+                                <<setw(8)<<x1[1]
+                                <<setw(8)<<x1[2]<<'\n';
+                            if (!mm_opt && find(resi_aln1.begin(),
+                                resi_aln1.end(),resi)!=resi_aln1.end())
+                            {
+                                buf_atm<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<lig_idx1%100000<<' '
+                                    <<atom<<' '<<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                            }
+                            if (atom==" CA " || atom==" C3'")
+                            {
+                                ca_idx1++;
+            //mm_opt, split_opt, mirror_opt, chainID1,chainID2);
+                                buf_all<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<ca_idx1%100000<<' '<<atom<<' '
+                                    <<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                                if (!mm_opt && find(resi_aln1.begin(),
+                                    resi_aln1.end(),resi)!=resi_aln1.end())
+                                {
+                                    buf<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<ca_idx1%100000<<' '<<atom<<' '
+                                    <<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                                    idx_vec.push_back(ca_idx1);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                while(1)
+                {
+                    if (fin.good()) getline(fin, line);
+                    else break;
+                    if (line.size()) break;
+                }
+            }
+        }
+        else if (line.size() && is_mmcif==false)
+        {
+            //buf_pdb<<line<<'\n';
+            if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+        }
+    }
+    fin.close();
+    if (!mm_opt) buf<<"TER\n";
+    buf_all<<"TER\n";
+    if (!mm_opt) buf_atm<<"TER\n";
+    buf_all_atm<<"TER\n";
+    buf_all_atm_lig<<"TER\n";
+    for (i=1;i<ca_idx1;i++) buf_all<<"CONECT"
+        <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
+    if (!mm_opt) for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
+        <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
+    idx_vec.clear();
+
+    /* read second file */
+    after_ter=false;
+    asym_id="";
+    fin.open(yname.c_str());
+    while (fin.good())
+    {
+        getline(fin, line);
+        if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
+        if (line.size()>=54 && (line.compare(0, 6, "ATOM  ")==0 ||
+            line.compare(0, 6, "HETATM")==0)) // PDB format
+        {
+            if (line[16]!='A' && line[16]!=' ') continue;
+            if (after_ter && line.compare(0,6,"ATOM  ")==0) continue;
+            lig_idx2++;
+            buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1+lig_idx2
+                <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            if (chain1_sele.size() && line[21]!=chain1_sele[0]) continue;
+            if (after_ter || line.compare(0,6,"ATOM  ")) continue;
+            if (ter_opt>=2)
+            {
+                if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1))
+                {
+                    after_ter=true;
+                    continue;
+                }
+                asym_id=line[21];
+            }
+            buf_all_atm<<"ATOM  "<<setw(5)<<lig_idx1+lig_idx2
+                <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            if (!mm_opt && find(resi_aln2.begin(),resi_aln2.end(),
+                line.substr(22,4))!=resi_aln2.end())
+            {
+                buf_atm<<"ATOM  "<<setw(5)<<lig_idx1+lig_idx2
+                    <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            }
+            if (line.substr(12,4)!=" CA " && line.substr(12,4)!=" C3'") continue;
+            ca_idx2++;
+            buf_all<<"ATOM  "<<setw(5)<<ca_idx1+ca_idx2<<' '<<line.substr(12,4)
+                <<' '<<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n';
+            if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4)
+                )==resi_aln2.end()) continue;
+            if (!mm_opt) buf<<"ATOM  "<<setw(5)<<ca_idx1+ca_idx2<<' '
+                <<line.substr(12,4)<<' '<<line.substr(17,3)<<" B"
+                <<line.substr(22,32)<<'\n';
+            idx_vec.push_back(ca_idx1+ca_idx2);
+        }
+        else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+        {
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
+                if (line.size()) break;
+            }
+            if (line.compare(0,11,"_atom_site.")) continue;
+            _atom_site.clear();
+            atom_site_pos=0;
+            _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
+                if (line.size()==0) continue;
+                if (line.compare(0,11,"_atom_site.")) break;
+                _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+            }
+
+            while(1)
+            {
+                line_vec.clear();
+                split(line,line_vec);
+                if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                    line_vec[_atom_site["group_PDB"]]!="HETATM") break;
+                if (_atom_site.count("pdbx_PDB_model_num"))
+                {
+                    if (model_index.size() && model_index!=
+                        line_vec[_atom_site["pdbx_PDB_model_num"]])
+                        break;
+                    model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+                }
+
+                if (_atom_site.count("label_alt_id")==0 || 
+                    line_vec[_atom_site["label_alt_id"]]=="." ||
+                    line_vec[_atom_site["label_alt_id"]]=="A")
+                {
+                    atom=line_vec[_atom_site["label_atom_id"]];
+                    if (atom[0]=='"') atom=atom.substr(1);
+                    if (atom.size() && atom[atom.size()-1]=='"')
+                        atom=atom.substr(0,atom.size()-1);
+                    if      (atom.size()==0) atom="    ";
+                    else if (atom.size()==1) atom=" "+atom+"  ";
+                    else if (atom.size()==2) atom=" "+atom+" ";
+                    else if (atom.size()==3) atom=" "+atom;
+                    else if (atom.size()>=5) atom=atom.substr(0,4);
+            
+                    AA=line_vec[_atom_site["label_comp_id"]]; // residue name
+                    if      (AA.size()==1) AA="  "+AA;
+                    else if (AA.size()==2) AA=" " +AA;
+                    else if (AA.size()>=4) AA=AA.substr(0,3);
+                
+                    if (_atom_site.count("auth_seq_id"))
+                        resi=line_vec[_atom_site["auth_seq_id"]];
+                    else resi=line_vec[_atom_site["label_seq_id"]];
+                    while (resi.size()<4) resi=' '+resi;
+                    if (resi.size()>4) resi=resi.substr(0,4);
+                
+                    inscode=' ';
+                    if (_atom_site.count("pdbx_PDB_ins_code") && 
+                        line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                        inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+                    
+                    if (_atom_site.count("auth_asym_id"))
+                    {
+                        if (chain2_sele.size()) after_ter
+                            =line_vec[_atom_site["auth_asym_id"]]!=chain2_sele;
+                        if (ter_opt>=2 && ca_idx2 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["auth_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["auth_asym_id"]];
+                    }
+                    else if (_atom_site.count("label_asym_id"))
+                    {
+                        if (chain2_sele.size()) after_ter
+                            =line_vec[_atom_site["label_asym_id"]]!=chain2_sele;
+                        if (ter_opt>=2 && ca_idx2 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["label_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["label_asym_id"]];
+                    }
+                    if (after_ter==false || 
+                        line_vec[_atom_site["group_PDB"]]=="HETATM")
+                    {
+                        lig_idx2++;
+                        buf_all_atm_lig<<left<<setw(6)
+                            <<line_vec[_atom_site["group_PDB"]]<<right
+                            <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                            <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                            <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                            <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                            <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                            <<'\n';
+                        if (after_ter==false &&
+                            line_vec[_atom_site["group_PDB"]]=="ATOM")
+                        {
+                            buf_all_atm<<"ATOM  "<<setw(6)
+                                <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                                <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                <<'\n';
+                            if (!mm_opt && find(resi_aln2.begin(),
+                                resi_aln2.end(),resi)!=resi_aln2.end())
+                            {
+                                buf_atm<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                                    <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                            }
+                            if (atom==" CA " || atom==" C3'")
+                            {
+                                ca_idx2++;
+                                buf_all<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(ca_idx1+ca_idx2)%100000
+                                    <<' '<<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                                if (!mm_opt && find(resi_aln2.begin(),
+                                    resi_aln2.end(),resi)!=resi_aln2.end())
+                                {
+                                    buf<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(ca_idx1+ca_idx2)%100000
+                                    <<' '<<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                                    idx_vec.push_back(ca_idx1+ca_idx2);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                if (fin.good()) getline(fin, line);
+                else break;
+            }
+        }
+        else if (line.size())
+        {
+            if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+        }
+    }
+    fin.close();
+    if (!mm_opt) buf<<"TER\n";
+    buf_all<<"TER\n";
+    if (!mm_opt) buf_atm<<"TER\n";
+    buf_all_atm<<"TER\n";
+    buf_all_atm_lig<<"TER\n";
+    for (i=ca_idx1+1;i<ca_idx1+ca_idx2;i++) buf_all<<"CONECT"
+        <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
+    for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
+        <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
+    idx_vec.clear();
+
+    /* write pymol script */
+    ofstream fp;
+    /*
+    stringstream buf_pymol;
+    vector<string> pml_list;
+    pml_list.push_back(fname_super+"");
+    pml_list.push_back(fname_super+"_atm");
+    pml_list.push_back(fname_super+"_all");
+    pml_list.push_back(fname_super+"_all_atm");
+    pml_list.push_back(fname_super+"_all_atm_lig");
+    for (i=0;i<pml_list.size();i++)
+    {
+        buf_pymol<<"#!/usr/bin/env pymol\n"
+            <<"load "<<pml_list[i]<<"\n"
+            <<"hide all\n"
+            <<((i==0 || i==2)?("show stick\n"):("show cartoon\n"))
+            <<"color blue, chain A\n"
+            <<"color red, chain B\n"
+            <<"set ray_shadow, 0\n"
+            <<"set stick_radius, 0.3\n"
+            <<"set sphere_scale, 0.25\n"
+            <<"show stick, not polymer\n"
+            <<"show sphere, not polymer\n"
+            <<"bg_color white\n"
+            <<"set transparency=0.2\n"
+            <<"zoom polymer\n"
+            <<endl;
+        fp.open((pml_list[i]+".pml").c_str());
+        fp<<buf_pymol.str();
+        fp.close();
+        buf_pymol.str(string());
+        pml_list[i].clear();
+    }
+    pml_list.clear();
+    */
+    
+    /* write rasmol script */
+    if (!mm_opt)
+    {
+        fp.open((fname_super).c_str());
+        fp<<buf.str();
+        fp.close();
+    }
+    fp.open((fname_super+"_all").c_str());
+    fp<<buf_all.str();
+    fp.close();
+    if (!mm_opt)
+    {
+        fp.open((fname_super+"_atm").c_str());
+        fp<<buf_atm.str();
+        fp.close();
+    }
+    fp.open((fname_super+"_all_atm").c_str());
+    fp<<buf_all_atm.str();
+    fp.close();
+    fp.open((fname_super+"_all_atm_lig").c_str());
+    fp<<buf_all_atm_lig.str();
+    fp.close();
+    //fp.open((fname_super+".pdb").c_str());
+    //fp<<buf_pdb.str();
+    //fp.close();
+
+    /* clear stream */
+    buf.str(string());
+    buf_all.str(string());
+    buf_atm.str(string());
+    buf_all_atm.str(string());
+    buf_all_atm_lig.str(string());
+    //buf_pdb.str(string());
+    buf_tm.str(string());
+    resi_aln1.clear();
+    resi_aln2.clear();
+    asym_id.clear();
+    line_vec.clear();
+    atom.clear();
+    AA.clear();
+    resi.clear();
+    inscode.clear();
+    model_index.clear();
 }
 
 /* extract rotation matrix based on TMscore8 */
@@ -1588,7 +2442,7 @@ void output_rotation_matrix(const char* fname_matrix,
     fout.open(fname_matrix, ios::out | ios::trunc);
     if (fout)// succeed
     {
-        fout << "------ The rotation matrix to rotate Chain_1 to Chain_2 ------\n";
+        fout << "------ The rotation matrix to rotate Structure_1 to Structure_2 ------\n";
         char dest[1000];
         sprintf(dest, "m %18s %14s %14s %14s\n", "t[m]", "u[m][0]", "u[m][1]", "u[m][2]");
         fout << string(dest);
@@ -1597,12 +2451,12 @@ void output_rotation_matrix(const char* fname_matrix,
             sprintf(dest, "%d %18.10f %14.10f %14.10f %14.10f\n", k, t[k], u[k][0], u[k][1], u[k][2]);
             fout << string(dest);
         }
-        fout << "\nCode for rotating Structure A from (x,y,z) to (X,Y,Z):\n"
+        fout << "\nCode for rotating Structure 1 from (x,y,z) to (X,Y,Z):\n"
                 "for(i=0; i<L; i++)\n"
                 "{\n"
-                "   X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i]\n"
-                "   Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i]\n"
-                "   Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i]\n"
+                "   X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n"
+                "   Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n"
+                "   Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n"
                 "}\n";
         fout.close();
     }
@@ -1611,48 +2465,48 @@ void output_rotation_matrix(const char* fname_matrix,
 }
 
 //output the final results
-void output_results(
-    const string xname, const string yname,
-    const char *chainID1, const char *chainID2,
+void output_results(const string xname, const string yname,
+    const string chainID1, const string chainID2,
     const int xlen, const int ylen, double t[3], double u[3][3],
     const double TM1, const double TM2,
     const double TM3, const double TM4, const double TM5,
-    const double rmsd, const double d0_out,
-    const char *seqM, const char *seqxA, const char *seqyA, const double Liden,
-    const int n_ali8, const int n_ali, const int L_ali,
-    const double TM_ali, const double rmsd_ali, const double TM_0,
-    const double d0_0, const double d0A, const double d0B,
-    const double Lnorm_ass, const double d0_scale, 
-    const double d0a, const double d0u, const char* fname_matrix,
-    const int outfmt_opt, const int ter_opt, const char *fname_super,
-    const bool i_opt, const bool I_opt, const int a_opt,
-    const bool u_opt, const bool d_opt)
+    const double rmsd, const double d0_out, const char *seqM,
+    const char *seqxA, const char *seqyA, const double Liden,
+    const int n_ali8, const int L_ali, const double TM_ali,
+    const double rmsd_ali, const double TM_0, const double d0_0,
+    const double d0A, const double d0B, const double Lnorm_ass,
+    const double d0_scale, const double d0a, const double d0u,
+    const char* fname_matrix, const int outfmt_opt, const int ter_opt,
+    const int mm_opt, const int split_opt, const int o_opt,
+    const string fname_super, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const int mirror_opt,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2)
 {
     if (outfmt_opt<=0)
     {
-        printf("\nName of Chain_1: %s%s (to be superimposed onto Chain_2)\n",
-            xname.c_str(), chainID1);
-        printf("Name of Chain_2: %s%s\n", yname.c_str(), chainID2);
-        printf("Length of Chain_1: %d residues\n", xlen);
-        printf("Length of Chain_2: %d residues\n\n", ylen);
+        printf("\nName of Structure_1: %s%s (to be superimposed onto Structure_2)\n",
+            xname.c_str(), chainID1.c_str());
+        printf("Name of Structure_2: %s%s\n", yname.c_str(), chainID2.c_str());
+        printf("Length of Structure_1: %d residues\n", xlen);
+        printf("Length of Structure_2: %d residues\n\n", ylen);
 
-        if (i_opt || I_opt)
+        if (i_opt)
             printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali);
 
-        printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, Liden/(n_ali8+0.00000001));
-        printf("TM-score= %6.5f (if normalized by length of Chain_1, i.e., LN=%d, d0=%.2f)\n", TM2, xlen, d0B);
-        printf("TM-score= %6.5f (if normalized by length of Chain_2, i.e., LN=%d, d0=%.2f)\n", TM1, ylen, d0A);
+        printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
+        printf("TM-score= %6.5f (normalized by length of Structure_1: L=%d, d0=%.2f)\n", TM2, xlen, d0B);
+        printf("TM-score= %6.5f (normalized by length of Structure_2: L=%d, d0=%.2f)\n", TM1, ylen, d0A);
 
         if (a_opt==1)
-            printf("TM-score= %6.5f (if normalized by average length of two structures, i.e., LN= %.1f, d0= %.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+            printf("TM-score= %6.5f (if normalized by average length of two structures: L=%.1f, d0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
         if (u_opt)
-            printf("TM-score= %6.5f (if normalized by user-specified LN=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u);
+            printf("TM-score= %6.5f (normalized by user-specified L=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u);
         if (d_opt)
-            printf("TM-score= %6.5f (if scaled by user-specified d0= %.2f, and LN= %d)\n", TM5, d0_scale, ylen);
+            printf("TM-score= %6.5f (scaled by user-specified d0=%.2f, and L=%d)\n", TM5, d0_scale, ylen);
         printf("(You should use TM-score normalized by length of the reference structure)\n");
     
         //output alignment
-        printf("\n(\":\" denotes residue pairs of d < %4.1f Angstrom, ", d0_out);
+        printf("\n(\":\" denotes residue pairs of d <%4.1f Angstrom, ", d0_out);
         printf("\".\" denotes other aligned residues)\n");
         printf("%s\n", seqxA);
         printf("%s\n", seqM);
@@ -1661,16 +2515,16 @@ void output_results(
     else if (outfmt_opt==1)
     {
         printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
-            xname.c_str(), chainID1, xlen, d0B, Liden/xlen, TM2);
+            xname.c_str(), chainID1.c_str(), xlen, d0B, Liden/xlen, TM2);
         printf("%s\n", seqxA);
         printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
-            yname.c_str(), chainID2, ylen, d0A, Liden/ylen, TM1);
+            yname.c_str(), chainID2.c_str(), ylen, d0A, Liden/ylen, TM1);
         printf("%s\n", seqyA);
 
         printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n",
-            n_ali8, rmsd, Liden/(n_ali8+0.00000001));
+            n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
 
-        if (i_opt || I_opt)
+        if (i_opt)
             printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali);
 
         if(a_opt)
@@ -1687,16 +2541,23 @@ void output_results(
     else if (outfmt_opt==2)
     {
         printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d",
-            xname.c_str(), chainID1, yname.c_str(), chainID2, TM2, TM1, rmsd,
-            Liden/xlen, Liden/ylen, Liden/(n_ali8+0.00000001),
+            xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(),
+            TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0,
             xlen, ylen, n_ali8);
     }
     cout << endl;
 
-    if (strlen(fname_matrix)) 
-        output_rotation_matrix(fname_matrix, t, u);
-    if (strlen(fname_super))
-        output_superpose(xname, fname_super, t, u, ter_opt);
+    if (strlen(fname_matrix)) output_rotation_matrix(fname_matrix, t, u);
+
+    if (o_opt==1)
+        output_pymol(xname, yname, fname_super, t, u, ter_opt,
+            mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA,
+            resi_vec1, resi_vec2, chainID1, chainID2);
+    else if (o_opt==2)
+        output_rasmol(xname, yname, fname_super, t, u, ter_opt,
+            mm_opt, split_opt, mirror_opt, seqM, seqxA, seqyA,
+            resi_vec1, resi_vec2, chainID1, chainID2,
+            xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden);
 }
 
 double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm,
@@ -1836,7 +2697,7 @@ void clean_up_after_approx_TM(int *invmap0, int *invmap,
  * 1   - terminated due to exception
  * 2-7 - pre-terminated due to low TM-score */
 int TMalign_main(double **xa, double **ya,
-    const char *seqx, const char *seqy, const int *secx, const int *secy,
+    const char *seqx, const char *seqy, const char *secx, const char *secy,
     double t0[3], double u0[3][3],
     double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
     double &d0_0, double &TM_0,
@@ -1846,8 +2707,7 @@ int TMalign_main(double **xa, double **ya,
     double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
     const int xlen, const int ylen,
     const vector<string> sequence, const double Lnorm_ass,
-    const double d0_scale,
-    const bool i_opt, const bool I_opt, const int a_opt,
+    const double d0_scale, const int i_opt, const int a_opt,
     const bool u_opt, const bool d_opt, const bool fast_opt,
     const int mol_type, const double TMcut=-1)
 {
@@ -1880,7 +2740,7 @@ int TMalign_main(double **xa, double **ya,
     /***********************/
     parameter_set4search(xlen, ylen, D0_MIN, Lnorm, 
         score_d8, d0, d0_search, dcu0);
-    int simplify_step    = 40; //for similified search engine
+    int simplify_step    = 40; //for simplified search engine
     int score_sum_method = 8;  //for scoring method, whether only sum over pairs with dis<score_d8
 
     int i;
@@ -1898,7 +2758,7 @@ int TMalign_main(double **xa, double **ya,
     //    Stick to the initial alignment              //
     //************************************************//
     bool bAlignStick = false;
-    if (I_opt)// if input has set parameter for "-I"
+    if (i_opt==3)// if input has set parameter for "-I"
     {
         // In the original code, this loop starts from 1, which is
         // incorrect. Fortran starts from 1 but C++ should starts from 0.
@@ -2151,7 +3011,7 @@ int TMalign_main(double **xa, double **ya,
         //************************************************//
         //    get initial alignment from user's input:    //
         //************************************************//
-        if (i_opt)// if input has set parameter for "-i"
+        if (i_opt==1)// if input has set parameter for "-i"
         {
             for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
                 invmap[j] = -1;
@@ -2209,7 +3069,7 @@ int TMalign_main(double **xa, double **ya,
     //*******************************************************************//
     //    The alignment will not be changed any more in the following    //
     //*******************************************************************//
-    //check if the initial alignment is generated approriately
+    //check if the initial alignment is generated appropriately
     bool flag=false;
     for(i=0; i<ylen; i++)
     {
@@ -2221,8 +3081,9 @@ int TMalign_main(double **xa, double **ya,
     }
     if(!flag)
     {
-        cout << "There is no alignment between the two proteins!" << endl;
-        cout << "Program stop with no result!" << endl;
+        cout << "There is no alignment between the two proteins! "
+             << "Program stop with no result!" << endl;
+        TM1=TM2=TM3=TM4=TM5=0;
         return 1;
     }
 
@@ -2245,7 +3106,7 @@ int TMalign_main(double **xa, double **ya,
     //    Detailed TMscore search engine --> prepare for final TMscore    //
     //********************************************************************//
     //run detailed TMscore search engine for the best alignment, and
-    //extract the best rotation matrix (t, u) for the best alginment
+    //extract the best rotation matrix (t, u) for the best alignment
     simplify_step=1;
     if (fast_opt) simplify_step=40;
     score_sum_method=8;
@@ -2268,7 +3129,7 @@ int TMalign_main(double **xa, double **ya,
         {
             n_ali++;
             d=sqrt(dist(&xt[i][0], &ya[j][0]));
-            if (d <= score_d8 || (I_opt == true))
+            if (d <= score_d8 || (i_opt == 3))
             {
                 m1[k]=i;
                 m2[k]=j;
@@ -2324,6 +3185,7 @@ int TMalign_main(double **xa, double **ya,
     TM2 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step,
         score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
 
+    double Lnorm_d0;
     if (a_opt>0)
     {
         //normalized by average length of structures A, B
@@ -2359,6 +3221,7 @@ int TMalign_main(double **xa, double **ya,
         d0_out=d0_scale;
         d0_0=d0_scale;
         //Lnorm_0=ylen;
+        Lnorm_d0=Lnorm_0;
         local_d0_search = d0_search;
         TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
             simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
@@ -2372,7 +3235,8 @@ int TMalign_main(double **xa, double **ya,
     seqM.assign( ali_len,' ');
     seqyA.assign(ali_len,'-');
     
-    do_rotation(xa, xt, xlen, t, u);
+    //do_rotation(xa, xt, xlen, t, u);
+    do_rotation(xa, xt, xlen, t0, u0);
 
     int kk=0, i_old=0, j_old=0;
     d=0;
@@ -2435,3 +3299,163 @@ int TMalign_main(double **xa, double **ya,
     delete [] m2;
     return 0; // zero for no exception
 }
+
+/* entry function for TM-align with circular permutation
+ * i_opt, a_opt, u_opt, d_opt, TMcut are not implemented yet */
+int CPalign_main(double **xa, double **ya,
+    const char *seqx, const char *seqy, const char *secx, const char *secy,
+    double t0[3], double u0[3][3],
+    double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
+    double &d0_0, double &TM_0,
+    double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
+    string &seqM, string &seqxA, string &seqyA,
+    double &rmsd0, int &L_ali, double &Liden,
+    double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
+    const int xlen, const int ylen,
+    const vector<string> sequence, const double Lnorm_ass,
+    const double d0_scale, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const bool fast_opt,
+    const int mol_type, const double TMcut=-1)
+{
+    char   *seqx_cp; // for the protein sequence 
+    char   *secx_cp; // for the secondary structure 
+    double **xa_cp;   // coordinates
+    string seqxA_cp,seqyA_cp;  // alignment
+    int    i,r;
+    int    cp_point=0;    // position of circular permutation
+    int    cp_aln_best=0; // amount of aligned residue in sliding window
+    int    cp_aln_current;// amount of aligned residue in sliding window
+
+    /* duplicate structure */
+    NewArray(&xa_cp, xlen*2, 3);
+    seqx_cp = new char[xlen*2 + 1];
+    secx_cp = new char[xlen*2 + 1];
+    for (r=0;r<xlen;r++)
+    {
+        xa_cp[r+xlen][0]=xa_cp[r][0]=xa[r][0];
+        xa_cp[r+xlen][1]=xa_cp[r][1]=xa[r][1];
+        xa_cp[r+xlen][2]=xa_cp[r][2]=xa[r][2];
+        seqx_cp[r+xlen]=seqx_cp[r]=seqx[r];
+        secx_cp[r+xlen]=secx_cp[r]=secx[r];
+    }
+    seqx_cp[2*xlen]=0;
+    secx_cp[2*xlen]=0;
+    
+    /* fTM-align alignment */
+    double TM1_cp,TM2_cp;
+    TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
+        t0, u0, TM1_cp, TM2_cp, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen*2, ylen, sequence, Lnorm_ass, d0_scale,
+        0, false, false, false, true, mol_type, -1);
+
+    /* delete gap in seqxA_cp */
+    r=0;
+    seqxA=seqxA_cp;
+    seqyA=seqyA_cp;
+    for (i=0;i<seqxA_cp.size();i++)
+    {
+        if (seqxA_cp[i]!='-')
+        {
+            seqxA[r]=seqxA_cp[i];
+            seqyA[r]=seqyA_cp[i];
+            r++;
+        }
+    }
+    seqxA=seqxA.substr(0,r);
+    seqyA=seqyA.substr(0,r);
+
+    /* count the number of aligned residues in each window
+     * r - residue index in the original unaligned sequence 
+     * i - position in the alignment */
+    for (r=0;r<xlen-1;r++)
+    {
+        cp_aln_current=0;
+        for (i=r;i<r+xlen;i++) cp_aln_current+=(seqyA[i]!='-');
+
+        if (cp_aln_current>cp_aln_best)
+        {
+            cp_aln_best=cp_aln_current;
+            cp_point=r;
+        }
+    }
+    seqM.clear();
+    seqxA.clear();
+    seqyA.clear();
+    seqxA_cp.clear();
+    seqyA_cp.clear();
+    rmsd0=Liden=n_ali=n_ali8=0;
+
+    /* fTM-align alignment */
+    TMalign_main(xa, ya, seqx, seqy, secx, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        0, false, false, false, true, mol_type, -1);
+
+    /* do not use cricular permutation of number of aligned residues is not
+     * larger than sequence-order dependent alignment */
+    if (n_ali8>cp_aln_best) cp_point=0;
+
+    /* prepare structure for final alignment */
+    seqM.clear();
+    seqxA.clear();
+    seqyA.clear();
+    rmsd0=Liden=n_ali=n_ali8=0;
+    if (cp_point!=0)
+    {
+        for (r=0;r<xlen;r++)
+        {
+            xa_cp[r][0]=xa_cp[r+cp_point][0];
+            xa_cp[r][1]=xa_cp[r+cp_point][1];
+            xa_cp[r][2]=xa_cp[r+cp_point][2];
+            seqx_cp[r]=seqx_cp[r+cp_point];
+            secx_cp[r]=secx_cp[r+cp_point];
+        }
+    }
+    seqx_cp[xlen]=0;
+    secx_cp[xlen]=0;
+
+    /* full TM-align */
+    TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type, TMcut);
+
+    /* correct alignment
+     * r - residue index in the original unaligned sequence 
+     * i - position in the alignment */
+    if (cp_point>0)
+    {
+        r=0;
+        for (i=0;i<seqxA_cp.size();i++)
+        {
+            r+=(seqxA_cp[i]!='-');
+            if (r>=(xlen-cp_point)) 
+            {
+                i++;
+                break;
+            }
+        }
+        seqxA=seqxA_cp.substr(0,i)+'*'+seqxA_cp.substr(i);
+        seqM =seqM.substr(0,i)    +' '+seqM.substr(i);
+        seqyA=seqyA_cp.substr(0,i)+'-'+seqyA_cp.substr(i);
+    }
+    else
+    {
+        seqxA=seqxA_cp;
+        seqyA=seqyA_cp;
+    }
+
+    /* clean up */
+    delete[]seqx_cp;
+    delete[]secx_cp;
+    DeleteArray(&xa_cp,xlen*2);
+    seqxA_cp.clear();
+    seqyA_cp.clear();
+    return cp_point;
+}
diff --git a/modules/bindings/src/tmalign/TMscore.cpp b/modules/bindings/src/tmalign/TMscore.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c2ca9958a1ceabb2caf0b914aafa13bb87490635
--- /dev/null
+++ b/modules/bindings/src/tmalign/TMscore.cpp
@@ -0,0 +1,525 @@
+#include "TMscore.h"
+
+using namespace std;
+
+void print_version()
+{
+    cout << 
+"\n"
+" *************************************************************************\n"
+" *                                 TM-SCORE                              *\n"
+" * A scoring function to assess the similarity of protein structures     *\n"
+" * Based on statistics:                                                  *\n"
+" *       0.0 < TM-score < 0.17, random structural similarity             *\n"
+" *       0.5 < TM-score < 1.00, in about the same fold                   *\n"
+" * Reference: Yang Zhang and Jeffrey Skolnick, Proteins 2004 57: 702-710 *\n"
+" * For comments, please email to: yangzhanglab@umich.edu                 *\n"
+" *************************************************************************"
+    << endl;
+}
+
+void print_extra_help()
+{
+    cout <<
+"Additional options:\n"
+"    -a       TM-score normalized by the average length of two structures\n"
+"             T or F, (default F)\n"
+"\n"
+"    -m       Output TM-score rotation matrix\n"
+"\n"
+"    -d       TM-score scaled by an assigned d0, e.g. 5 Angstroms\n"
+"\n"
+"    -fast    Fast but slightly inaccurate alignment\n"
+"\n"
+"    -dir     Perform all-against-all alignment among the list of PDB\n"
+"             chains listed by 'chain_list' under 'chain_folder'. Note\n"
+"             that the slash is necessary.\n"
+"             $ TMalign -dir chain_folder/ chain_list\n"
+"\n"
+"    -dir1    Use chain2 to search a list of PDB chains listed by 'chain1_list'\n"
+"             under 'chain1_folder'. Note that the slash is necessary.\n"
+"             $ TMalign -dir1 chain1_folder/ chain1_list chain2\n"
+"\n"
+"    -dir2    Use chain1 to search a list of PDB chains listed by 'chain2_list'\n"
+"             under 'chain2_folder'\n"
+"             $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n"
+"\n"
+"    -suffix  (Only when -dir1 and/or -dir2 are set, default is empty)\n"
+"             add file name suffix to files listed by chain1_list or chain2_list\n"
+"\n"
+"    -atom    4-character atom name used to represent a residue.\n"
+"             Default is \" C3'\" for RNA/DNA and \" CA \" for proteins\n"
+"             (note the spaces before and after CA).\n"
+"\n"
+"    -mol     Molecule type: RNA or protein\n"
+"             Default is detect molecule type automatically\n"
+"\n"
+"    -ter     Strings to mark the end of a chain\n"
+"             3: (default) TER, ENDMDL, END or different chain ID\n"
+"             2: ENDMDL, END, or different chain ID\n"
+"             1: ENDMDL or END\n"
+"             0: (default in the first C++ TMalign) end of file\n"
+"\n"
+"    -split   Whether to split PDB file into multiple chains\n"
+"             0: (default) treat the whole structure as one single chain\n"
+"             1: treat each MODEL as a separate chain (-ter should be 0)\n"
+"             2: treat each chain as a seperate chain (-ter should be <=1)\n"
+"\n"
+"    -outfmt  Output format\n"
+"             0: (default) full output\n"
+"             1: fasta format compact output\n"
+"             2: tabular format very compact output\n"
+"            -1: full output, but without version or citation information\n"
+"\n"
+"    -mirror  Whether to align the mirror image of input structure\n"
+"             0: (default) do not align mirrored structure\n"
+"             1: align mirror of chain1 to origin chain2\n"
+"\n"
+"    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
+"    -infmt1  Input format for chain1\n"
+"    -infmt2  Input format for chain2\n"
+"            -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
+"             0: PDB format\n"
+"             1: SPICKER format\n"
+"             2: xyz format\n"
+"             3: PDBx/mmCIF format\n"
+    <<endl;
+}
+
+void print_help(bool h_opt=false)
+{
+    //print_version();
+    cout <<
+"\n"
+" Brief instruction for running TM-score program:\n"
+" (For detail: Zhang & Skolnick, Proteins, 2004 57:702-10)\n"
+"\n"
+" 1. Run TM-score to compare 'model' and 'native':\n"
+"     $ TMscore model.pdb native.pdb\n"
+"\n"
+" 2. Run TM-score to compare two complex structures with multiple chains\n"
+"     $ TMscore -c model.pdb native.pdb\n"
+"\n"
+" 2. TM-score normalized with an assigned scale d0 e.g. 5 A:\n"
+"     $ TMscore model.pdb native.pdb -d 5\n"
+"\n"
+" 3. TM-score normalized by a specific length, e.g. 120 AA:\n"
+"     $ TMscore model.pdb native.pdv -l 120\n"
+"\n"
+" 4. TM-score with superposition output, e.g. 'TM_sup.pdb':\n"
+"     $ TMscore model.pdb native.pdb -o TM_sup.pdb\n"
+"    To view superimposed atomic model by PyMOL:\n"
+"     $ pymol TM_sup.pdb native.pdb\n"
+    <<endl;
+
+    if (h_opt) print_extra_help();
+
+    exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2) print_help();
+
+    /**********************/
+    /*    get argument    */
+    /**********************/
+    string xname       = "";
+    string yname       = "";
+    string fname_super = ""; // file name for superposed structure
+    string fname_lign  = ""; // file name for user alignment
+    string fname_matrix= ""; // file name for output matrix
+    vector<string> sequence; // get value from alignment file
+    double Lnorm_ass, d0_scale;
+
+    bool h_opt = false; // print full help message
+    bool v_opt = false; // print version
+    bool m_opt = false; // flag for -m, output rotation matrix
+    bool o_opt = false; // flag for -o, output superposed structure
+    int  a_opt = 0;     // flag for -a, do not normalized by average length
+    bool u_opt = false; // flag for -u, normalized by user specified length
+    bool d_opt = false; // flag for -d, user specified d0
+
+    double TMcut     =-1;
+    int    infmt1_opt=-1;    // PDB or PDBx/mmCIF format for chain_1
+    int    infmt2_opt=-1;    // PDB or PDBx/mmCIF format for chain_2
+    int    ter_opt   =3;     // TER, END, or different chainID
+    int    split_opt =0;     // do not split chain
+    int    outfmt_opt=0;     // set -outfmt to full output
+    bool   fast_opt  =false; // flags for -fast, fTM-align algorithm
+    int    mirror_opt=0;     // do not align mirror
+    int    het_opt=0;        // do not read HETATM residues
+    string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
+    string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
+    string suffix_opt="";    // set -suffix to empty
+    string dir_opt   ="";    // set -dir to empty
+    string dir1_opt  ="";    // set -dir1 to empty
+    string dir2_opt  ="";    // set -dir2 to empty
+    int    byresi_opt=1;     // TM-score without -c
+    vector<string> chain1_list; // only when -dir1 is set
+    vector<string> chain2_list; // only when -dir2 is set
+
+    for(int i = 1; i < argc; i++)
+    {
+        if ( !strcmp(argv[i],"-o") && i < (argc-1) )
+        {
+            fname_super = argv[i + 1];     o_opt = true; i++;
+        }
+        else if ( (!strcmp(argv[i],"-u") || !strcmp(argv[i],"-l") ||
+                   !strcmp(argv[i],"-L")) && i < (argc-1) )
+        {
+            Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++;
+        }
+        else if ( !strcmp(argv[i],"-a") && i < (argc-1) )
+        {
+            if (!strcmp(argv[i + 1], "T"))      a_opt=true;
+            else if (!strcmp(argv[i + 1], "F")) a_opt=false;
+            else 
+            {
+                a_opt=atoi(argv[i + 1]);
+                if (a_opt!=-2 && a_opt!=-1 && a_opt!=1)
+                    PrintErrorAndQuit("-a must be -2, -1, 1, T or F");
+            }
+            i++;
+        }
+        else if ( !strcmp(argv[i],"-d") && i < (argc-1) )
+        {
+            d0_scale = atof(argv[i + 1]); d_opt = true; i++;
+        }
+        else if ( !strcmp(argv[i],"-v") )
+        {
+            v_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-h") )
+        {
+            h_opt = true;
+        }
+        else if (!strcmp(argv[i], "-m") && i < (argc-1) )
+        {
+            fname_matrix = argv[i + 1];    m_opt = true; i++;
+        }// get filename for rotation matrix
+        else if (!strcmp(argv[i], "-fast"))
+        {
+            fast_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) )
+        {
+            infmt1_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) )
+        {
+            infmt2_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-ter") && i < (argc-1) )
+        {
+            ter_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-split") && i < (argc-1) )
+        {
+            split_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-atom") && i < (argc-1) )
+        {
+            atom_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-mol") && i < (argc-1) )
+        {
+            mol_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir") && i < (argc-1) )
+        {
+            dir_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) )
+        {
+            dir1_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) )
+        {
+            dir2_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) )
+        {
+            suffix_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) )
+        {
+            outfmt_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-c") )
+        {
+            byresi_opt=2;
+        }
+        else if ( !strcmp(argv[i],"-mirror") && i < (argc-1) )
+        {
+            mirror_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
+        else if (xname.size() == 0) xname=argv[i];
+        else if (yname.size() == 0) yname=argv[i];
+        else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
+    }
+
+    if(xname.size()==0 || (yname.size()==0 && dir_opt.size()==0) || 
+                          (yname.size()    && dir_opt.size()))
+    {
+        if (h_opt) print_help(h_opt);
+        if (v_opt)
+        {
+            print_version();
+            exit(EXIT_FAILURE);
+        }
+        if (xname.size()==0)
+            PrintErrorAndQuit("Please provide input structures");
+        else if (yname.size()==0 && dir_opt.size()==0)
+            PrintErrorAndQuit("Please provide structure B");
+        else if (yname.size() && dir_opt.size())
+            PrintErrorAndQuit("Please provide only one file name if -dir is set");
+    }
+
+    if (suffix_opt.size() && dir_opt.size()+dir1_opt.size()+dir2_opt.size()==0)
+        PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set");
+    if ((dir_opt.size() || dir1_opt.size() || dir2_opt.size()))
+    {
+        if (m_opt || o_opt)
+            PrintErrorAndQuit("-m or -o cannot be set with -dir, -dir1 or -dir2");
+        else if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size()))
+            PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2");
+    }
+    if (atom_opt.size()!=4)
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
+    if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
+        PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein.");
+    else if (mol_opt=="protein" && atom_opt=="auto")
+        atom_opt=" CA ";
+    else if (mol_opt=="RNA" && atom_opt=="auto")
+        atom_opt=" C3'";
+
+    if (u_opt && Lnorm_ass<=0)
+        PrintErrorAndQuit("Wrong value for option -u!  It should be >0");
+    if (d_opt && d0_scale<=0)
+        PrintErrorAndQuit("Wrong value for option -d!  It should be >0");
+    if (outfmt_opt>=2 && (a_opt || u_opt || d_opt))
+        PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d");
+    if (byresi_opt>=2 && ter_opt>=2)
+        PrintErrorAndQuit("-byresi >=2 should be used with -ter <=1");
+    if (split_opt==1 && ter_opt!=0)
+        PrintErrorAndQuit("-split 1 should be used with -ter 0");
+    else if (split_opt==2 && ter_opt!=0 && ter_opt!=1)
+        PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1");
+    if (split_opt<0 || split_opt>2)
+        PrintErrorAndQuit("-split can only be 0, 1 or 2");
+
+    if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt
+        PrintErrorAndQuit("ERROR! Please provide a file name for option -m!");
+
+    /* parse file list */
+    if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname);
+    else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt);
+
+    if (dir_opt.size())
+        for (int i=0;i<chain1_list.size();i++)
+            chain2_list.push_back(chain1_list[i]);
+    else if (dir2_opt.size()==0) chain2_list.push_back(yname);
+    else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt);
+
+    if (outfmt_opt==2)
+        cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t"
+            <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"<<endl;
+
+    /* declare previously global variables */
+    vector<vector<string> >PDB_lines1; // text of chain1
+    vector<vector<string> >PDB_lines2; // text of chain2
+    vector<int> mol_vec1;              // molecule type of chain1, RNA if >0
+    vector<int> mol_vec2;              // molecule type of chain2, RNA if >0
+    vector<string> chainID_list1;      // list of chainID1
+    vector<string> chainID_list2;      // list of chainID2
+    int    i,j;                // file index
+    int    chain_i,chain_j;    // chain index
+    int    r;                  // residue index
+    int    xlen, ylen;         // chain length
+    int    xchainnum,ychainnum;// number of chains in a PDB file
+    char   *seqx, *seqy;       // for the protein sequence 
+    double **xa, **ya;         // for input vectors xa[0...xlen-1][0..2] and
+                               // ya[0...ylen-1][0..2], in general,
+                               // ya is regarded as native structure 
+                               // --> superpose xa onto ya
+    vector<string> resi_vec1;  // residue index for chain1
+    vector<string> resi_vec2;  // residue index for chain2
+
+    /* loop over file names */
+    for (i=0;i<chain1_list.size();i++)
+    {
+        /* parse chain 1 */
+        xname=chain1_list[i];
+        xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
+            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
+        if (!xchainnum)
+        {
+            cerr<<"Warning! Cannot parse file: "<<xname
+                <<". Chain number 0."<<endl;
+            continue;
+        }
+        for (chain_i=0;chain_i<xchainnum;chain_i++)
+        {
+            xlen=PDB_lines1[chain_i].size();
+            if (mol_opt=="RNA") mol_vec1[chain_i]=1;
+            else if (mol_opt=="protein") mol_vec1[chain_i]=-1;
+            if (!xlen)
+            {
+                cerr<<"Warning! Cannot parse file: "<<xname
+                    <<". Chain length 0."<<endl;
+                continue;
+            }
+            else if (xlen<3)
+            {
+                cerr<<"Sequence is too short <3!: "<<xname<<endl;
+                continue;
+            }
+            NewArray(&xa, xlen, 3);
+            seqx = new char[xlen + 1];
+            xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, 
+                resi_vec1, byresi_opt);
+            if (mirror_opt) for (r=0;r<xlen;r++) xa[r][2]=-xa[r][2];
+
+            for (j=(dir_opt.size()>0)*(i+1);j<chain2_list.size();j++)
+            {
+                /* parse chain 2 */
+                if (PDB_lines2.size()==0)
+                {
+                    yname=chain2_list[j];
+                    ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
+                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
+                        het_opt);
+                    if (!ychainnum)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain number 0."<<endl;
+                        continue;
+                    }
+                }
+                for (chain_j=0;chain_j<ychainnum;chain_j++)
+                {
+                    ylen=PDB_lines2[chain_j].size();
+                    if (mol_opt=="RNA") mol_vec2[chain_j]=1;
+                    else if (mol_opt=="protein") mol_vec2[chain_j]=-1;
+                    if (!ylen)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain length 0."<<endl;
+                        continue;
+                    }
+                    else if (ylen<3)
+                    {
+                        cerr<<"Sequence is too short <3!: "<<yname<<endl;
+                        continue;
+                    }
+                    NewArray(&ya, ylen, 3);
+                    seqy = new char[ylen + 1];
+                    ylen = read_PDB(PDB_lines2[chain_j], ya, seqy,
+                        resi_vec2, byresi_opt);
+
+                    if (byresi_opt) extract_aln_from_resi(sequence,
+                        seqx,seqy,resi_vec1,resi_vec2,byresi_opt);
+
+                    /* declare variable specific to this pair of TMalign */
+                    double t0[3], u0[3][3];
+                    double TM1, TM2;
+                    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+                    double d0_0, TM_0;
+                    double d0A, d0B, d0u, d0a;
+                    double d0_out=5.0;
+                    string seqM, seqxA, seqyA;// for output alignment
+                    double rmsd0 = 0.0;
+                    int L_ali;                // Aligned length in standard_TMscore
+                    double Liden=0;
+                    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+                    int n_ali=0;
+                    int n_ali8=0;
+
+                    double rmsd_d0_out=0;
+                    int L_lt_d=0;
+                    double GDT_list[5]={0,0,0,0,0}; // 0.5, 1, 2, 4, 8
+                    double maxsub=0;
+
+                    /* entry function for structure alignment */
+                    TMscore_main(
+                        xa, ya, seqx, seqy,
+                        t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                        seqM, seqxA, seqyA,
+                        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                        a_opt, u_opt, d_opt, fast_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j],
+                        GDT_list,maxsub,TMcut);
+
+                    /* print result */
+                    if (outfmt_opt==0) print_version();
+                    output_TMscore_results(
+                        xname.substr(dir1_opt.size()+dir_opt.size()),
+                        yname.substr(dir2_opt.size()+dir_opt.size()),
+                        chainID_list1[chain_i],
+                        chainID_list2[chain_j],
+                        xlen, ylen, t0, u0, TM1, TM2, 
+                        TM3, TM4, TM5, rmsd0, d0_out,
+                        seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
+                        n_ali8, L_ali, TM_ali, rmsd_ali,
+                        TM_0, d0_0, d0A, d0B,
+                        Lnorm_ass, d0_scale, d0a, d0u, 
+                        (m_opt?fname_matrix+chainID_list1[chain_i]:"").c_str(),
+                        outfmt_opt, ter_opt, 
+                        (o_opt?fname_super+chainID_list1[chain_i]:"").c_str(),
+                        a_opt, u_opt, d_opt, mirror_opt,
+                        L_lt_d, rmsd_d0_out, GDT_list, maxsub,
+                        split_opt, resi_vec1, resi_vec2);
+
+                    /* Done! Free memory */
+                    seqM.clear();
+                    seqxA.clear();
+                    seqyA.clear();
+                    DeleteArray(&ya, ylen);
+                    delete [] seqy;
+                    resi_vec2.clear();
+                } // chain_j
+                if (chain2_list.size()>1)
+                {
+                    yname.clear();
+                    for (chain_j=0;chain_j<ychainnum;chain_j++)
+                        PDB_lines2[chain_j].clear();
+                    PDB_lines2.clear();
+                    chainID_list2.clear();
+                    mol_vec2.clear();
+                }
+            } // j
+            PDB_lines1[chain_i].clear();
+            DeleteArray(&xa, xlen);
+            delete [] seqx;
+            resi_vec1.clear();
+        } // chain_i
+        xname.clear();
+        PDB_lines1.clear();
+        chainID_list1.clear();
+        mol_vec1.clear();
+    } // i
+    if (chain2_list.size()==1)
+    {
+        yname.clear();
+        for (chain_j=0;chain_j<ychainnum;chain_j++)
+            PDB_lines2[chain_j].clear();
+        PDB_lines2.clear();
+        resi_vec2.clear();
+        chainID_list2.clear();
+        mol_vec2.clear();
+    }
+    chain1_list.clear();
+    chain2_list.clear();
+    sequence.clear();
+    return 0;
+}
diff --git a/modules/bindings/src/tmalign/TMscore.h b/modules/bindings/src/tmalign/TMscore.h
new file mode 100644
index 0000000000000000000000000000000000000000..445335c79e9f08561d0adef17addfeb2cff79830
--- /dev/null
+++ b/modules/bindings/src/tmalign/TMscore.h
@@ -0,0 +1,958 @@
+#include "TMalign.h"
+
+int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
+    double *score1, int score_sum_method, const double Lnorm, 
+    const double score_d8, const double d0,
+    double GDT_list_tmp[5], double &maxsub_tmp)
+{
+    double score_sum=0, di;
+    double d_tmp=d*d;
+    double d02=d0*d0;
+    double score_d8_cut = score_d8*score_d8;
+    
+    int i, n_cut, inc=0;
+
+    while(1)
+    {
+        for (i=0;i<5;i++) GDT_list_tmp[i]=0;
+        maxsub_tmp=0;
+
+        n_cut=0;
+        score_sum=0;
+        for(i=0; i<n_ali; i++)
+        {
+            di = dist(xa[i], ya[i]);
+            if(di<d_tmp)
+            {
+                i_ali[n_cut]=i;
+                n_cut++;
+            }
+            if(score_sum_method==8)
+            {                
+                if(di<=score_d8_cut) score_sum += 1/(1+di/d02);
+            }
+            else score_sum += 1/(1+di/d02);
+
+            /* for maxsub score */
+            //maxsub_tmp+=1/(1+di/12.25);
+            if (di<64) // 8*8=64
+            {
+                GDT_list_tmp[4]+=1;
+                if (di<16) // 4*4=16
+                {
+                    GDT_list_tmp[3]+=1;
+                    if (di<12.25) // 3.5^2=12.25
+                    {
+                        maxsub_tmp+=1/(1+di/12.25);
+                        if (di<4) // 2*2=4
+                        {
+                            GDT_list_tmp[2]+=1;
+                            if (di<1) // 1*1=1
+                            {
+                                GDT_list_tmp[1]+=1;
+                                if (di<0.25) // 0.5*0.5=0.25
+                                    GDT_list_tmp[0]+=1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        //there are not enough feasible pairs, reliefe the threshold         
+        if(n_cut<3 && n_ali>3)
+        {
+            inc++;
+            double dinc=(d+inc*0.5);
+            d_tmp = dinc * dinc;
+        }
+        else break;
+    }  
+
+    *score1=score_sum/Lnorm;
+    return n_cut;
+}
+
+int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
+    int i_ali[], double *score1, int score_sum_method,
+    double score_d8, double d0, double GDT_list_tmp[5], double &maxsub_tmp)
+{
+    double score_sum = 0, di;
+    double d_tmp = d*d;
+    double d02 = d0*d0;
+    double score_d8_cut = score_d8*score_d8;
+
+    int i, n_cut, inc = 0;
+    while (1)
+    {
+        for (i=0;i<5;i++) GDT_list_tmp[i]=0;
+        maxsub_tmp=0;
+        n_cut = 0;
+        score_sum = 0;
+        for (i = 0; i<n_ali; i++)
+        {
+            di = dist(xa[i], ya[i]);
+            if (di<d_tmp)
+            {
+                i_ali[n_cut] = i;
+                n_cut++;
+            }
+            if (score_sum_method == 8)
+            {
+                if (di <= score_d8_cut) score_sum += 1 / (1 + di / d02);
+            }
+            else
+            {
+                score_sum += 1 / (1 + di / d02);
+            }
+
+            /* for maxsub score */
+            //maxsub_tmp+=1/(1+di/12.25);
+            if (di<64) // 8*8=64
+            {
+                GDT_list_tmp[4]+=1;
+                if (di<16) // 4*4=16
+                {
+                    GDT_list_tmp[3]+=1;
+                    if (di<12.25) // 3.5^2=12.25
+                    {
+                        maxsub_tmp+=1/(1+di/12.25);
+                        if (di<4) // 2*2=4
+                        {
+                            GDT_list_tmp[2]+=1;
+                            if (di<1) // 1*1=1
+                            {
+                                GDT_list_tmp[1]+=1;
+                                if (di<0.25) // 0.5*0.5=0.25
+                                    GDT_list_tmp[0]+=1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        //there are not enough feasible pairs, reliefe the threshold         
+        if (n_cut<3 && n_ali>3)
+        {
+            inc++;
+            double dinc = (d + inc*0.5);
+            d_tmp = dinc * dinc;
+        }
+        else break;
+    }
+
+    *score1 = score_sum / n_ali;
+    return n_cut;
+}
+
+double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm,
+    double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step,
+    int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm,
+    double score_d8, double d0, double GDT_list[5], double &maxsub)
+{
+    double GDT_list_tmp[5]={0,0,0,0,0};
+    double maxsub_tmp=0;
+    int i, m;
+    double score_max, score, rmsd;    
+    const int kmax=Lali;    
+    int k_ali[kmax], ka, k;
+    double t[3];
+    double u[3][3];
+    double d;
+    
+
+    //iterative parameters
+    int n_it=20;            //maximum number of iterations
+    int n_init_max=6; //maximum number of different fragment length 
+    int L_ini[n_init_max];  //fragment lengths, Lali, Lali/2, Lali/4 ... 4   
+    int L_ini_min=4;
+    if(Lali<L_ini_min) L_ini_min=Lali;   
+
+    int n_init=0, i_init;      
+    for(i=0; i<n_init_max-1; i++)
+    {
+        n_init++;
+        L_ini[i]=(int) (Lali/pow(2.0, (double) i));
+        if(L_ini[i]<=L_ini_min)
+        {
+            L_ini[i]=L_ini_min;
+            break;
+        }
+    }
+    if(i==n_init_max-1)
+    {
+        n_init++;
+        L_ini[i]=L_ini_min;
+    }
+    
+    score_max=-1;
+    //find the maximum score starting from local structures superposition
+    int i_ali[kmax], n_cut;
+    int L_frag; //fragment length
+    int iL_max; //maximum starting postion for the fragment
+    
+    for(i_init=0; i_init<n_init; i_init++)
+    {
+        L_frag=L_ini[i_init];
+        iL_max=Lali-L_frag;
+      
+        i=0;   
+        while(1)
+        {
+            //extract the fragment starting from position i 
+            ka=0;
+            for(k=0; k<L_frag; k++)
+            {
+                int kk=k+i;
+                r1[k][0]=xtm[kk][0];  
+                r1[k][1]=xtm[kk][1]; 
+                r1[k][2]=xtm[kk][2];   
+                
+                r2[k][0]=ytm[kk][0];  
+                r2[k][1]=ytm[kk][1]; 
+                r2[k][2]=ytm[kk][2];
+                
+                k_ali[ka]=kk;
+                ka++;
+            }
+            
+            //extract rotation matrix based on the fragment
+            Kabsch(r1, r2, L_frag, 1, &rmsd, t, u);
+            if (simplify_step != 1)
+                *Rcomm = 0;
+            do_rotation(xtm, xt, Lali, t, u);
+            
+            //get subsegment of this fragment
+            d = local_d0_search - 1;
+            n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score, 
+                score_sum_method, Lnorm, score_d8, d0, 
+                GDT_list_tmp, maxsub_tmp);
+            if(score>score_max)
+            {
+                score_max=score;
+                
+                //save the rotation matrix
+                for(k=0; k<3; k++)
+                {
+                    t0[k]=t[k];
+                    u0[k][0]=u[k][0];
+                    u0[k][1]=u[k][1];
+                    u0[k][2]=u[k][2];
+                }
+            }
+            if (maxsub_tmp>maxsub) maxsub=maxsub_tmp;
+            for (k=0;k<5;k++)
+                if (GDT_list_tmp[k]>GDT_list[k])
+                    GDT_list[k]=GDT_list_tmp[k];
+            
+            //try to extend the alignment iteratively            
+            d = local_d0_search + 1;
+            for(int it=0; it<n_it; it++)            
+            {
+                ka=0;
+                for(k=0; k<n_cut; k++)
+                {
+                    m=i_ali[k];
+                    r1[k][0]=xtm[m][0];  
+                    r1[k][1]=xtm[m][1]; 
+                    r1[k][2]=xtm[m][2];
+                    
+                    r2[k][0]=ytm[m][0];  
+                    r2[k][1]=ytm[m][1]; 
+                    r2[k][2]=ytm[m][2];
+                    
+                    k_ali[ka]=m;
+                    ka++;
+                } 
+                //extract rotation matrix based on the fragment                
+                Kabsch(r1, r2, n_cut, 1, &rmsd, t, u);
+                do_rotation(xtm, xt, Lali, t, u);
+                n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score, 
+                    score_sum_method, Lnorm, score_d8, d0);
+                if(score>score_max)
+                {
+                    score_max=score;
+
+                    //save the rotation matrix
+                    for(k=0; k<3; k++)
+                    {
+                        t0[k]=t[k];
+                        u0[k][0]=u[k][0];
+                        u0[k][1]=u[k][1];
+                        u0[k][2]=u[k][2];
+                    }                     
+                }
+                if (maxsub_tmp>maxsub) maxsub=maxsub_tmp;
+                for (k=0;k<5;k++)
+                    if (GDT_list_tmp[k]>GDT_list[k])
+                        GDT_list[k]=GDT_list_tmp[k];
+                
+                //check if it converges            
+                if(n_cut==ka)
+                {                
+                    for(k=0; k<n_cut; k++)
+                    {
+                        if(i_ali[k]!=k_ali[k]) break;
+                    }
+                    if(k==n_cut) break;
+                }                                                               
+            } //for iteration            
+
+            if(i<iL_max)
+            {
+                i=i+simplify_step; //shift the fragment        
+                if(i>iL_max) i=iL_max;  //do this to use the last missed fragment
+            }
+            else if(i>=iL_max) break;
+        }//while(1)
+        //end of one fragment
+    }//for(i_init
+    return score_max;
+}
+
+double TMscore8_search_standard( double **r1, double **r2,
+    double **xtm, double **ytm, double **xt, int Lali,
+    double t0[3], double u0[3][3], int simplify_step, int score_sum_method,
+    double *Rcomm, double local_d0_search, double score_d8, double d0,
+    double GDT_list[5], double &maxsub)
+{
+    double GDT_list_tmp[5]={0,0,0,0,0};
+    double maxsub_tmp=0;
+    int i, m;
+    double score_max, score, rmsd;
+    const int kmax = Lali;
+    int k_ali[kmax], ka, k;
+    double t[3];
+    double u[3][3];
+    double d;
+
+    //iterative parameters
+    int n_it = 20;            //maximum number of iterations
+    int n_init_max = 6; //maximum number of different fragment length 
+    int L_ini[n_init_max];  //fragment lengths, Lali, Lali/2, Lali/4 ... 4   
+    int L_ini_min = 4;
+    if (Lali<L_ini_min) L_ini_min = Lali;
+
+    int n_init = 0, i_init;
+    for (i = 0; i<n_init_max - 1; i++)
+    {
+        n_init++;
+        L_ini[i] = (int)(Lali / pow(2.0, (double)i));
+        if (L_ini[i] <= L_ini_min)
+        {
+            L_ini[i] = L_ini_min;
+            break;
+        }
+    }
+    if (i == n_init_max - 1)
+    {
+        n_init++;
+        L_ini[i] = L_ini_min;
+    }
+
+    score_max = -1;
+    //find the maximum score starting from local structures superposition
+    int i_ali[kmax], n_cut;
+    int L_frag; //fragment length
+    int iL_max; //maximum starting postion for the fragment
+
+    for (i_init = 0; i_init<n_init; i_init++)
+    {
+        L_frag = L_ini[i_init];
+        iL_max = Lali - L_frag;
+
+        i = 0;
+        while (1)
+        {
+            //extract the fragment starting from position i 
+            ka = 0;
+            for (k = 0; k<L_frag; k++)
+            {
+                int kk = k + i;
+                r1[k][0] = xtm[kk][0];
+                r1[k][1] = xtm[kk][1];
+                r1[k][2] = xtm[kk][2];
+
+                r2[k][0] = ytm[kk][0];
+                r2[k][1] = ytm[kk][1];
+                r2[k][2] = ytm[kk][2];
+
+                k_ali[ka] = kk;
+                ka++;
+            }
+            //extract rotation matrix based on the fragment
+            Kabsch(r1, r2, L_frag, 1, &rmsd, t, u);
+            if (simplify_step != 1)
+                *Rcomm = 0;
+            do_rotation(xtm, xt, Lali, t, u);
+
+            //get subsegment of this fragment
+            d = local_d0_search - 1;
+            n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score,
+                score_sum_method, score_d8, d0, GDT_list_tmp, maxsub_tmp);
+
+            if (score>score_max)
+            {
+                score_max = score;
+
+                //save the rotation matrix
+                for (k = 0; k<3; k++)
+                {
+                    t0[k] = t[k];
+                    u0[k][0] = u[k][0];
+                    u0[k][1] = u[k][1];
+                    u0[k][2] = u[k][2];
+                }
+            }
+            if (maxsub_tmp>maxsub) maxsub=maxsub_tmp;
+            for (k=0;k<5;k++)
+                if (GDT_list_tmp[k]>GDT_list[k])
+                    GDT_list[k]=GDT_list_tmp[k];
+
+            //try to extend the alignment iteratively            
+            d = local_d0_search + 1;
+            for (int it = 0; it<n_it; it++)
+            {
+                ka = 0;
+                for (k = 0; k<n_cut; k++)
+                {
+                    m = i_ali[k];
+                    r1[k][0] = xtm[m][0];
+                    r1[k][1] = xtm[m][1];
+                    r1[k][2] = xtm[m][2];
+
+                    r2[k][0] = ytm[m][0];
+                    r2[k][1] = ytm[m][1];
+                    r2[k][2] = ytm[m][2];
+
+                    k_ali[ka] = m;
+                    ka++;
+                }
+                //extract rotation matrix based on the fragment                
+                Kabsch(r1, r2, n_cut, 1, &rmsd, t, u);
+                do_rotation(xtm, xt, Lali, t, u);
+                n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score,
+                    score_sum_method, score_d8, d0, GDT_list_tmp, maxsub_tmp);
+                if (score>score_max)
+                {
+                    score_max = score;
+
+                    //save the rotation matrix
+                    for (k = 0; k<3; k++)
+                    {
+                        t0[k] = t[k];
+                        u0[k][0] = u[k][0];
+                        u0[k][1] = u[k][1];
+                        u0[k][2] = u[k][2];
+                    }
+                }
+                if (maxsub_tmp>maxsub) maxsub=maxsub_tmp;
+                for (k=0;k<5;k++)
+                    if (GDT_list_tmp[k]>GDT_list[k])
+                        GDT_list[k]=GDT_list_tmp[k];
+
+                //check if it converges            
+                if (n_cut == ka)
+                {
+                    for (k = 0; k<n_cut; k++)
+                    {
+                        if (i_ali[k] != k_ali[k]) break;
+                    }
+                    if (k == n_cut) break;
+                }
+            } //for iteration            
+
+            if (i<iL_max)
+            {
+                i = i + simplify_step; //shift the fragment        
+                if (i>iL_max) i = iL_max;  //do this to use the last missed fragment
+            }
+            else if (i >= iL_max) break;
+        }//while(1)
+        //end of one fragment
+    }//for(i_init
+    return score_max;
+}
+
+double detailed_search_standard( double **r1, double **r2,
+    double **xtm, double **ytm, double **xt, double **x, double **y,
+    int xlen, int ylen, int invmap0[], double t[3], double u[3][3],
+    int simplify_step, int score_sum_method, double local_d0_search,
+    const bool& bNormalize, double Lnorm, double score_d8, double d0,
+    double GDT_list[5], double &maxsub)
+{
+    //x is model, y is template, try to superpose onto y
+    int i, j, k;     
+    double tmscore;
+    double rmsd;
+
+    k=0;
+    for(i=0; i<ylen; i++) 
+    {
+        j=invmap0[i];
+        if(j>=0) //aligned
+        {
+            xtm[k][0]=x[j][0];
+            xtm[k][1]=x[j][1];
+            xtm[k][2]=x[j][2];
+                
+            ytm[k][0]=y[i][0];
+            ytm[k][1]=y[i][1];
+            ytm[k][2]=y[i][2];
+            k++;
+        }
+    }
+
+    //detailed search 40-->1
+    tmscore = TMscore8_search_standard( r1, r2, xtm, ytm, xt, k, t, u,
+        simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0,
+        GDT_list, maxsub);
+    if (bNormalize)// "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false; 
+        tmscore = tmscore * k / Lnorm;
+
+    return tmscore;
+}
+
+/* Entry function for TM-score. Return TM-score calculation status:
+ * 0   - full TM-score calculation 
+ * 1   - terminated due to exception
+ * 2-7 - pre-terminated due to low TM-score */
+int TMscore_main(double **xa, double **ya,
+    const char *seqx, const char *seqy, double t0[3], double u0[3][3],
+    double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
+    double &d0_0, double &TM_0,
+    double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
+    string &seqM, string &seqxA, string &seqyA,
+    double &rmsd0, int &L_ali, double &Liden,
+    double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
+    const int xlen, const int ylen,
+    const vector<string> sequence, const double Lnorm_ass,
+    const double d0_scale, const int a_opt,
+    const bool u_opt, const bool d_opt, const bool fast_opt,
+    const int mol_type, double GDT_list[5], double &maxsub,
+    const double TMcut=-1)
+{
+    double D0_MIN;        //for d0
+    double Lnorm;         //normalization length
+    double score_d8,d0,d0_search,dcu0;//for TMscore search
+    double t[3], u[3][3]; //Kabsch translation vector and rotation matrix
+    double **score;       // Input score table for dynamic programming
+    bool   **path;        // for dynamic programming  
+    double **val;         // for dynamic programming  
+    double **xtm, **ytm;  // for TMscore search engine
+    double **xt;          //for saving the superposed version of r_1 or xtm
+    double **r1, **r2;    // for Kabsch rotation
+
+    /***********************/
+    /* allocate memory     */
+    /***********************/
+    int minlen = min(xlen, ylen);
+    NewArray(&score, xlen+1, ylen+1);
+    NewArray(&path, xlen+1, ylen+1);
+    NewArray(&val, xlen+1, ylen+1);
+    NewArray(&xtm, minlen, 3);
+    NewArray(&ytm, minlen, 3);
+    NewArray(&xt, xlen, 3);
+    NewArray(&r1, minlen, 3);
+    NewArray(&r2, minlen, 3);
+
+    /***********************/
+    /*    parameter set    */
+    /***********************/
+    parameter_set4search(xlen, ylen, D0_MIN, Lnorm, 
+        score_d8, d0, d0_search, dcu0);
+    int simplify_step    = 40; //for similified search engine
+    int score_sum_method = 8;  //for scoring method, whether only sum over pairs with dis<score_d8
+
+    int i;
+    int *invmap0         = new int[ylen+1];
+    int *invmap          = new int[ylen+1];
+    double TM, TMmax=-1;
+    for(i=0; i<ylen; i++) invmap0[i]=-1;
+
+    double ddcc=0.4;
+    if (Lnorm <= 40) ddcc=0.1;   //Lnorm was setted in parameter_set4search
+    double local_d0_search = d0_search;
+
+    //************************************************//
+    //    Stick to the initial alignment              //
+    //************************************************//
+    for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
+        invmap[j] = -1;
+
+    int i1 = -1;// in C version, index starts from zero, not from one
+    int i2 = -1;
+    int L1 = sequence[0].size();
+    int L2 = sequence[1].size();
+    int L = min(L1, L2);// Get positions for aligned residues
+    for (int kk1 = 0; kk1 < L; kk1++)
+    {
+        if (sequence[0][kk1] != '-') i1++;
+        if (sequence[1][kk1] != '-')
+        {
+            i2++;
+            if (i2 >= ylen || i1 >= xlen) kk1 = L;
+            else if (sequence[0][kk1] != '-') invmap[i2] = i1;
+        }
+    }
+
+    //--------------- 2. Align proteins from original alignment
+    double prevD0_MIN = D0_MIN;// stored for later use
+    int prevLnorm = Lnorm;
+    double prevd0 = d0;
+    TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+        invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8,
+        t, u, mol_type);
+    D0_MIN = prevD0_MIN;
+    Lnorm = prevLnorm;
+    d0 = prevd0;
+    TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+        invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0);
+    if (TM > TMmax)
+    {
+        TMmax = TM;
+        for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+    }
+
+    //*******************************************************************//
+    //    The alignment will not be changed any more in the following    //
+    //*******************************************************************//
+    //check if the initial alignment is generated approriately
+    bool flag=false;
+    for(i=0; i<ylen; i++)
+    {
+        if(invmap0[i]>=0)
+        {
+            flag=true;
+            break;
+        }
+    }
+    if(!flag)
+    {
+        cout << "There is no alignment between the two proteins!" << endl;
+        cout << "Program stop with no result!" << endl;
+        return 1;
+    }
+
+    /* last TM-score pre-termination */
+    if (TMcut>0)
+    {
+        double TMtmp=approx_TM(xlen, ylen, a_opt,
+            xa, ya, t0, u0, invmap0, mol_type);
+
+        if (TMtmp<0.6*TMcut)
+        {
+            TM1=TM2=TM3=TM4=TM5=TMtmp;
+            clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                xtm, ytm, xt, r1, r2, xlen, minlen);
+            return 7;
+        }
+    }
+
+    //********************************************************************//
+    //    Detailed TMscore search engine --> prepare for final TMscore    //
+    //********************************************************************//
+    //run detailed TMscore search engine for the best alignment, and
+    //extract the best rotation matrix (t, u) for the best alginment
+    simplify_step=1;
+    if (fast_opt) simplify_step=40;
+    score_sum_method=8;
+    TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+        invmap0, t, u, simplify_step, score_sum_method, local_d0_search,
+        false, Lnorm, score_d8, d0,
+        GDT_list, maxsub);
+
+    //select pairs with dis<d8 for final TMscore computation and output alignment
+    int k=0;
+    int *m1, *m2;
+    double d;
+    m1=new int[xlen]; //alignd index in x
+    m2=new int[ylen]; //alignd index in y
+    do_rotation(xa, xt, xlen, t, u);
+    k=0;
+    for(int j=0; j<ylen; j++)
+    {
+        i=invmap0[j];
+        if(i>=0)//aligned
+        {
+            n_ali++;
+            d=sqrt(dist(&xt[i][0], &ya[j][0]));
+            m1[k]=i;
+            m2[k]=j;
+
+            xtm[k][0]=xa[i][0];
+            xtm[k][1]=xa[i][1];
+            xtm[k][2]=xa[i][2];
+
+            ytm[k][0]=ya[j][0];
+            ytm[k][1]=ya[j][1];
+            ytm[k][2]=ya[j][2];
+
+            r1[k][0] = xt[i][0];
+            r1[k][1] = xt[i][1];
+            r1[k][2] = xt[i][2];
+            r2[k][0] = ya[j][0];
+            r2[k][1] = ya[j][1];
+            r2[k][2] = ya[j][2];
+
+            k++;
+        }
+    }
+    n_ali8=k;
+
+    Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u);// rmsd0 is used for final output, only recalculate rmsd0, not t & u
+    rmsd0 = sqrt(rmsd0 / n_ali8);
+
+
+    //****************************************//
+    //              Final TMscore             //
+    //    Please set parameters for output    //
+    //****************************************//
+    double rmsd;
+    simplify_step=1;
+    score_sum_method=0;
+    double Lnorm_0=ylen;
+
+
+    //normalized by length of structure A
+    parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+    d0A=d0;
+    d0_0=d0A;
+    local_d0_search = d0_search;
+    TM1 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, simplify_step,
+        score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0,
+        GDT_list, maxsub);
+    TM_0 = TM1;
+
+    double Lnorm_d0;
+    if (a_opt>0)
+    {
+        //normalized by average length of structures A, B
+        Lnorm_0=(xlen+ylen)*0.5;
+        parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+        d0a=d0;
+        d0_0=d0a;
+        local_d0_search = d0_search;
+
+        TM3 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM3;
+    }
+    if (u_opt)
+    {
+        //normalized by user assigned length
+        parameter_set4final(Lnorm_ass, D0_MIN, Lnorm,
+            d0, d0_search, mol_type);
+        d0u=d0;
+        d0_0=d0u;
+        Lnorm_0=Lnorm_ass;
+        local_d0_search = d0_search;
+        TM4 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM4;
+    }
+    if (d_opt)
+    {
+        //scaled by user assigned d0
+        parameter_set4scale(ylen, d0_scale, Lnorm, d0, d0_search);
+        d0_out=d0_scale;
+        d0_0=d0_scale;
+        //Lnorm_0=ylen;
+        Lnorm_d0=Lnorm_0;
+        local_d0_search = d0_search;
+        TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM5;
+    }
+
+    /* derive alignment from superposition */
+    int ali_len=xlen+ylen; //maximum length of alignment
+    seqxA.assign(ali_len,'-');
+    seqM.assign( ali_len,' ');
+    seqyA.assign(ali_len,'-');
+    
+    //do_rotation(xa, xt, xlen, t, u);
+    do_rotation(xa, xt, xlen, t0, u0);
+
+    int kk=0, i_old=0, j_old=0;
+    d=0;
+    for(int k=0; k<n_ali8; k++)
+    {
+        for(int i=i_old; i<m1[k]; i++)
+        {
+            //align x to gap
+            seqxA[kk]=seqx[i];
+            seqyA[kk]='-';
+            seqM[kk]=' ';                    
+            kk++;
+        }
+
+        for(int j=j_old; j<m2[k]; j++)
+        {
+            //align y to gap
+            seqxA[kk]='-';
+            seqyA[kk]=seqy[j];
+            seqM[kk]=' ';
+            kk++;
+        }
+
+        seqxA[kk]=seqx[m1[k]];
+        seqyA[kk]=seqy[m2[k]];
+        Liden+=(seqxA[kk]==seqyA[kk]);
+        d=sqrt(dist(&xt[m1[k]][0], &ya[m2[k]][0]));
+        //if(d<d0_out) seqM[kk]=':';
+        //else         seqM[kk]='.';
+        if(d<5) seqM[kk]=':';
+        kk++;  
+        i_old=m1[k]+1;
+        j_old=m2[k]+1;
+    }
+
+    //tail
+    for(int i=i_old; i<xlen; i++)
+    {
+        //align x to gap
+        seqxA[kk]=seqx[i];
+        seqyA[kk]='-';
+        seqM[kk]=' ';
+        kk++;
+    }    
+    for(int j=j_old; j<ylen; j++)
+    {
+        //align y to gap
+        seqxA[kk]='-';
+        seqyA[kk]=seqy[j];
+        seqM[kk]=' ';
+        kk++;
+    }
+    seqxA=seqxA.substr(0,kk);
+    seqyA=seqyA.substr(0,kk);
+    seqM =seqM.substr(0,kk);
+
+    /* free memory */
+    clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+        xtm, ytm, xt, r1, r2, xlen, minlen);
+    delete [] m1;
+    delete [] m2;
+    return 0; // zero for no exception
+}
+
+void output_TMscore_results(
+    const string xname, const string yname,
+    const string chainID1, const string chainID2,
+    const int xlen, const int ylen, double t[3], double u[3][3],
+    const double TM1, const double TM2,
+    const double TM3, const double TM4, const double TM5,
+    const double rmsd, const double d0_out,
+    const char *seqM, const char *seqxA, const char *seqyA, const double Liden,
+    const int n_ali8, const int L_ali,
+    const double TM_ali, const double rmsd_ali, const double TM_0,
+    const double d0_0, const double d0A, const double d0B,
+    const double Lnorm_ass, const double d0_scale, 
+    const double d0a, const double d0u, const char* fname_matrix,
+    const int outfmt_opt, const int ter_opt, const char *fname_super,
+    const int a_opt, const bool u_opt, const bool d_opt, const int mirror_opt,
+    int L_lt_d, const double rmsd_d0_out,
+    double GDT_list[5], double maxsub, const int split_opt,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2)
+{
+    if (outfmt_opt<=0)
+    {
+        printf("\nStructure1: %s%s    Length=%5d\n",
+            xname.c_str(), chainID1.c_str(), xlen);
+        printf("Structure2: %s%s    Length=%5d (by which all scores are normalized)\n",
+            yname.c_str(), chainID2.c_str(), ylen);
+
+        printf("Number of residues in common=%5d\n", n_ali8);
+        printf("RMSD of  the common residues=%9.3f\n\n", rmsd);
+        printf("TM-score    = %6.4f  (d0= %.2f)\n", TM1, d0A);
+        printf("MaxSub-score= %6.4f  (d0= 3.50)\n", maxsub/ylen);
+
+        double gdt_ts_score=0;
+        double gdt_ha_score=0;
+        int i;
+        for (i=0;i<4;i++)
+        {
+            gdt_ts_score+=GDT_list[i+1];
+            gdt_ha_score+=GDT_list[i];
+        }
+        gdt_ts_score/=(4*ylen);
+        gdt_ha_score/=(4*ylen);
+        printf("GDT-TS-score= %6.4f %%(d<1)=%6.4f %%(d<2)=%6.4f %%(d<4)=%6.4f %%(d<8)=%6.4f\n",
+            gdt_ts_score, GDT_list[1]/ylen, GDT_list[2]/ylen,
+                          GDT_list[3]/ylen, GDT_list[4]/ylen);
+        printf("GDT-HA-score= %6.4f %%(d<0.5)=%6.4f %%(d<1)=%6.4f %%(d<2)=%6.4f %%(d<4)=%6.4f\n",
+            gdt_ha_score, GDT_list[0]/ylen, GDT_list[1]/ylen,
+                          GDT_list[2]/ylen, GDT_list[3]/ylen);
+
+        if (a_opt==1)
+            printf("TM-score    = %5.4f  (if normalized by average length of two structures, i.e., LN= %.1f, d0= %.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+        if (u_opt)
+            printf("TM-score    = %5.4f  (if normalized by user-specified LN=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u);
+        if (d_opt)
+            printf("TM-score    = %5.5f  (if scaled by user-specified d0= %.2f, and LN= %d)\n", TM5, d0_scale, ylen);
+    
+
+        printf("\n -------- rotation matrix to rotate Chain-1 to Chain-2 ------\n");
+        printf(" i          t(i)         u(i,1)         u(i,2)         u(i,3)\n");
+        printf(" 1 %17.10f %14.10f %14.10f %14.10f\n",t[0],u[0][0],u[0][1],u[0][2]);
+        printf(" 2 %17.10f %14.10f %14.10f %14.10f\n",t[1],u[1][0],u[1][1],u[1][2]);
+        printf(" 3 %17.10f %14.10f %14.10f %14.10f\n",t[2],u[2][0],u[2][1],u[2][2]);
+
+        //output alignment
+        string seq_scale=seqM;
+        for (i=0;i<strlen(seqM);i++)
+        {
+            L_lt_d+=seqM[i]==':';
+            seq_scale[i]=(i+1)%10+'0';
+        }
+        printf("\nSuperposition in the TM-score: Length(d<%3.1f)= %d\n", d0_out, L_lt_d);
+        //printf("\nSuperposition in the TM-score: Length(d<%3.1f)= %d  RMSD=%6.2f\n", d0_out, L_lt_d, rmsd_d0_out);
+        printf("(\":\" denotes the residue pairs of distance <%4.1f Angstrom)\n", d0_out);
+        printf("%s\n", seqxA);
+        printf("%s\n", seqM);
+        printf("%s\n", seqyA);
+        printf("%s\n", seq_scale.c_str());
+        seq_scale.clear();
+    }
+    else if (outfmt_opt==1)
+    {
+        printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
+            xname.c_str(), chainID1.c_str(), xlen, d0B, Liden/xlen, TM2);
+        printf("%s\n", seqxA);
+        printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
+            yname.c_str(), chainID2.c_str(), ylen, d0A, Liden/ylen, TM1);
+        printf("%s\n", seqyA);
+
+        printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n",
+            n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
+
+        if(a_opt)
+            printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+
+        if(u_opt)
+            printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u);
+
+        if(d_opt)
+            printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen);
+
+        printf("$$$$\n");
+    }
+    else if (outfmt_opt==2)
+    {
+        printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d",
+            xname.c_str(), chainID1.c_str(), yname.c_str(), chainID2.c_str(),
+            TM2, TM1, rmsd, Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0,
+            xlen, ylen, n_ali8);
+    }
+    cout << endl;
+
+    if (strlen(fname_matrix)) 
+        output_rotation_matrix(fname_matrix, t, u);
+    if (strlen(fname_super))
+        output_pymol(xname, yname, fname_super, t, u, ter_opt, 
+            0, split_opt, mirror_opt, seqM, seqxA, seqyA,
+            resi_vec1, resi_vec2, chainID1, chainID2);
+}
diff --git a/modules/bindings/src/tmalign/basic_fun.h b/modules/bindings/src/tmalign/basic_fun.h
index 3dadccc30faf0dde5403b9adc123d2cf376bb867..0e8ae307d81a045f12998f90afdd4d0d00c628cb 100644
--- a/modules/bindings/src/tmalign/basic_fun.h
+++ b/modules/bindings/src/tmalign/basic_fun.h
@@ -7,11 +7,7 @@
 #include <math.h>
 #include <time.h>
 #include <string.h>
-// OST-NOTE: ifdef was added here since malloc.h isn't required for Linux/Mac
-//           and for some compilers (clang, gcc8) it isn't available
-#ifdef _WIN32
-#include <malloc.h>
-#endif
+//#include <malloc.h>
 
 #include <sstream>
 #include <iostream>
@@ -80,35 +76,36 @@ string AAmap(char A)
     if (A=='W') return "TRP";    
     if (A=='Y') return "TYR";
     if (A=='Z') return "GLX";
-    if ('a'<=A && A<='z') return "  "+toupper(A);
+    if ('a'<=A && A<='z') return "  "+string(1,char(toupper(A)));
     return "UNK";
 }
 
 char AAmap(const string &AA)
 {
-    if (AA.compare("ALA")==0) return 'A';
+    if (AA.compare("ALA")==0 || AA.compare("DAL")==0) return 'A';
     if (AA.compare("ASX")==0) return 'B';
-    if (AA.compare("CYS")==0) return 'C';
-    if (AA.compare("ASP")==0) return 'D';
-    if (AA.compare("GLU")==0) return 'E';
-    if (AA.compare("PHE")==0) return 'F';
+    if (AA.compare("CYS")==0 || AA.compare("DCY")==0) return 'C';
+    if (AA.compare("ASP")==0 || AA.compare("DAS")==0) return 'D';
+    if (AA.compare("GLU")==0 || AA.compare("DGL")==0) return 'E';
+    if (AA.compare("PHE")==0 || AA.compare("DPN")==0) return 'F';
     if (AA.compare("GLY")==0) return 'G';
-    if (AA.compare("HIS")==0) return 'H';
-    if (AA.compare("ILE")==0) return 'I';
-    if (AA.compare("LYS")==0) return 'K';
-    if (AA.compare("LEU")==0) return 'L';
-    if (AA.compare("MET")==0 || AA.compare("MSE")==0) return 'M';
-    if (AA.compare("ASN")==0) return 'N';
+    if (AA.compare("HIS")==0 || AA.compare("DHI")==0) return 'H';
+    if (AA.compare("ILE")==0 || AA.compare("DIL")==0) return 'I';
+    if (AA.compare("LYS")==0 || AA.compare("DLY")==0) return 'K';
+    if (AA.compare("LEU")==0 || AA.compare("DLE")==0) return 'L';
+    if (AA.compare("MET")==0 || AA.compare("MED")==0 ||
+        AA.compare("MSE")==0) return 'M';
+    if (AA.compare("ASN")==0 || AA.compare("DSG")==0) return 'N';
     if (AA.compare("PYL")==0) return 'O';
-    if (AA.compare("PRO")==0) return 'P';
-    if (AA.compare("GLN")==0) return 'Q';
-    if (AA.compare("ARG")==0) return 'R';
-    if (AA.compare("SER")==0) return 'S';
-    if (AA.compare("THR")==0) return 'T';
+    if (AA.compare("PRO")==0 || AA.compare("DPR")==0) return 'P';
+    if (AA.compare("GLN")==0 || AA.compare("DGN")==0) return 'Q';
+    if (AA.compare("ARG")==0 || AA.compare("DAR")==0) return 'R';
+    if (AA.compare("SER")==0 || AA.compare("DSN")==0) return 'S';
+    if (AA.compare("THR")==0 || AA.compare("DTH")==0) return 'T';
     if (AA.compare("SEC")==0) return 'U';
-    if (AA.compare("VAL")==0) return 'V';
-    if (AA.compare("TRP")==0) return 'W';    
-    if (AA.compare("TYR")==0) return 'Y';
+    if (AA.compare("VAL")==0 || AA.compare("DVA")==0) return 'V';
+    if (AA.compare("TRP")==0 || AA.compare("DTR")==0) return 'W';    
+    if (AA.compare("TYR")==0 || AA.compare("DTY")==0) return 'Y';
     if (AA.compare("GLX")==0) return 'Z';
 
     if (AA.compare(0,2," D")==0) return tolower(AA[2]);
@@ -124,7 +121,7 @@ void split(const string &line, vector<string> &line_vec,
     const char delimiter=' ')
 {
     bool within_word = false;
-    for (unsigned int pos=0;pos<line.size();pos++)
+    for (size_t pos=0;pos<line.size();pos++)
     {
         if (line[pos]==delimiter)
         {
@@ -142,8 +139,8 @@ void split(const string &line, vector<string> &line_vec,
 
 size_t get_PDB_lines(const string filename,
     vector<vector<string> >&PDB_lines, vector<string> &chainID_list,
-    vector<int> &mol_vec, const int ter_opt=3, const int infmt_opt=-1,
-    const string atom_opt="auto", const int split_opt=0)
+    vector<int> &mol_vec, const int ter_opt, const int infmt_opt,
+    const string atom_opt, const int split_opt, const int het_opt)
 {
     size_t i=0; // resi i.e. atom index
     string line;
@@ -159,13 +156,13 @@ size_t get_PDB_lines(const string filename,
     if (filename.size()>=3 && 
         filename.substr(filename.size()-3,3)==".gz")
     {
-        fin_gz.open("zcat "+filename);
+        fin_gz.open("zcat '"+filename+"'");
         compress_type=1;
     }
     else if (filename.size()>=4 && 
         filename.substr(filename.size()-4,4)==".bz2")
     {
-        fin_gz.open("bzcat "+filename);
+        fin_gz.open("bzcat '"+filename+"'");
         compress_type=2;
     }
     else fin.open(filename.c_str());
@@ -178,15 +175,18 @@ size_t get_PDB_lines(const string filename,
             else               getline(fin, line);
             if (infmt_opt==-1 && line.compare(0,5,"loop_")==0) // PDBx/mmCIF
                 return get_PDB_lines(filename,PDB_lines,chainID_list,
-                    mol_vec, ter_opt, 3, atom_opt, split_opt);
+                    mol_vec, ter_opt, 3, atom_opt, split_opt,het_opt);
             if (i > 0)
             {
                 if      (ter_opt>=1 && line.compare(0,3,"END")==0) break;
                 else if (ter_opt>=3 && line.compare(0,3,"TER")==0) break;
             }
             if (split_opt && line.compare(0,3,"END")==0) chainID=0;
-            if (line.compare(0, 6, "ATOM  ")==0 && line.size()>=54 &&
-               (line[16]==' ' || line[16]=='A'))
+            if (line.size()>=54 && (line[16]==' ' || line[16]=='A') && (
+                (line.compare(0, 6, "ATOM  ")==0) || 
+                (line.compare(0, 6, "HETATM")==0 && het_opt==1) ||
+                (line.compare(0, 6, "HETATM")==0 && het_opt==2 && 
+                 line.compare(17,3, "MSE")==0)))
             {
                 if (atom_opt=="auto")
                 {
@@ -208,12 +208,12 @@ size_t get_PDB_lines(const string filename,
                             if (chainID==' ')
                             {
                                 if (ter_opt>=1) i8_stream << ":_";
-                                else i8_stream<<':'<<model_idx<<":_";
+                                else i8_stream<<':'<<model_idx<<",_";
                             }
                             else
                             {
                                 if (ter_opt>=1) i8_stream << ':' << chainID;
-                                else i8_stream<<':'<<model_idx<<':'<<chainID;
+                                else i8_stream<<':'<<model_idx<<','<<chainID;
                             }
                             chainID_list.push_back(i8_stream.str());
                         }
@@ -234,12 +234,12 @@ size_t get_PDB_lines(const string filename,
                         if (chainID==' ')
                         {
                             if (ter_opt>=1) i8_stream << ":_";
-                            else i8_stream<<':'<<model_idx<<":_";
+                            else i8_stream<<':'<<model_idx<<",_";
                         }
                         else
                         {
                             if (ter_opt>=1) i8_stream << ':' << chainID;
-                            else i8_stream<<':'<<model_idx<<':'<<chainID;
+                            else i8_stream<<':'<<model_idx<<','<<chainID;
                         }
                         chainID_list.push_back(i8_stream.str());
                         PDB_lines.push_back(tmp_str_vec);
@@ -260,7 +260,7 @@ size_t get_PDB_lines(const string filename,
     }
     else if (infmt_opt==1) // SPICKER format
     {
-        int L=0;
+        size_t L=0;
         float x,y,z;
         stringstream i8_stream;
         while (compress_type?fin_gz.good():fin.good())
@@ -276,7 +276,7 @@ size_t get_PDB_lines(const string filename,
             chainID_list.push_back(i8_stream.str());
             PDB_lines.push_back(tmp_str_vec);
             mol_vec.push_back(0);
-            for (i=0;(int) i<L;i++)
+            for (i=0;i<L;i++)
             {
                 if (compress_type) fin_gz>>x>>y>>z;
                 else               fin   >>x>>y>>z;
@@ -293,7 +293,7 @@ size_t get_PDB_lines(const string filename,
     }
     else if (infmt_opt==2) // xyz format
     {
-        int L=0;
+        size_t L=0;
         stringstream i8_stream;
         while (compress_type?fin_gz.good():fin.good())
         {
@@ -308,7 +308,7 @@ size_t get_PDB_lines(const string filename,
             chainID_list.push_back(':'+line.substr(0,i));
             PDB_lines.push_back(tmp_str_vec);
             mol_vec.push_back(0);
-            for (i=0;(int) i<L;i++)
+            for (i=0;i<L;i++)
             {
                 if (compress_type) getline(fin_gz, line);
                 else               getline(fin, line);
@@ -343,12 +343,25 @@ size_t get_PDB_lines(const string filename,
         {
             if (compress_type) getline(fin_gz, line);
             else               getline(fin, line);
+            if (line.size()==0) continue;
             if (loop_) loop_ = line.compare(0,2,"# ");
             if (!loop_)
             {
                 if (line.compare(0,5,"loop_")) continue;
-                if (compress_type) getline(fin_gz, line);
-                else               getline(fin, line);
+                while(1)
+                {
+                    if (compress_type)
+                    {
+                        if (fin_gz.good()) getline(fin_gz, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                    }
+                    else
+                    {
+                        if (fin.good()) getline(fin, line);
+                        else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                    }
+                    if (line.size()) break;
+                }
                 if (line.compare(0,11,"_atom_site.")) continue;
 
                 loop_=true;
@@ -360,6 +373,7 @@ size_t get_PDB_lines(const string filename,
                 {
                     if (compress_type) getline(fin_gz, line);
                     else               getline(fin, line);
+                    if (line.size()==0) continue;
                     if (line.compare(0,11,"_atom_site.")) break;
                     _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
                 }
@@ -377,14 +391,19 @@ size_t get_PDB_lines(const string filename,
                     _atom_site.count("Cartn_z")==0)
                 {
                     loop_ = false;
-                    cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, label_atom_id, label_atom_id, auth_asym_id/label_asym_id, auth_seq_id/label_seq_id, Cartn_x, Cartn_y, Cartn_z"<<endl;
+                    cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, label_atom_id, label_comp_id, auth_asym_id/label_asym_id, auth_seq_id/label_seq_id, Cartn_x, Cartn_y, Cartn_z"<<endl;
                     continue;
                 }
             }
 
             line_vec.clear();
             split(line,line_vec);
-            if (line_vec[_atom_site["group_PDB"]]!="ATOM") continue;
+            if ((line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                 line_vec[_atom_site["group_PDB"]]!="HETATM") ||
+                (line_vec[_atom_site["group_PDB"]]=="HETATM" &&
+                 (het_opt==0 || 
+                 (het_opt==2 && line_vec[_atom_site["label_comp_id"]]!="MSE")))
+                ) continue;
             
             alt_id=".";
             if (_atom_site.count("label_alt_id")) // in 39.4 % of entries
@@ -435,9 +454,11 @@ size_t get_PDB_lines(const string filename,
                     if (split_opt==1 && ter_opt==0) chainID_list.push_back(
                         ':'+model_index);
                     else if (split_opt==2 && ter_opt==0)
-                        chainID_list.push_back(':'+model_index+':'+asym_id);
-                    else if (split_opt==2 && ter_opt==1)
+                        chainID_list.push_back(':'+model_index+','+asym_id);
+                    else //if (split_opt==2 && ter_opt==1)
                         chainID_list.push_back(':'+asym_id);
+                    //else
+                        //chainID_list.push_back("");
                 }
             }
 
@@ -452,9 +473,11 @@ size_t get_PDB_lines(const string filename,
                     if (split_opt==1 && ter_opt==0) chainID_list.push_back(
                         ':'+model_index);
                     else if (split_opt==2 && ter_opt==0)
-                        chainID_list.push_back(':'+model_index+':'+asym_id);
-                    else if (split_opt==2 && ter_opt==1)
+                        chainID_list.push_back(':'+model_index+','+asym_id);
+                    else //if (split_opt==2 && ter_opt==1)
                         chainID_list.push_back(':'+asym_id);
+                    //else
+                        //chainID_list.push_back("");
                 }
             }
             if (prev_asym_id!=asym_id) prev_asym_id=asym_id;
@@ -478,9 +501,9 @@ size_t get_PDB_lines(const string filename,
             i8_stream<<"ATOM  "
                 <<setw(5)<<i<<" "<<atom<<" "<<AA<<" "<<asym_id[0]
                 <<setw(5)<<resi.substr(0,5)<<"   "
-                <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
-                <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
-                <<setw(8)<<line_vec[_atom_site["Cartn_z"]];
+                <<setw(8)<<line_vec[_atom_site["Cartn_x"]].substr(0,8)
+                <<setw(8)<<line_vec[_atom_site["Cartn_y"]].substr(0,8)
+                <<setw(8)<<line_vec[_atom_site["Cartn_z"]].substr(0,8);
             PDB_lines.back().push_back(i8_stream.str());
             i8_stream.str(string());
         }
@@ -510,7 +533,7 @@ size_t get_FASTA_lines(const string filename,
 {
     string line;
     vector<string> tmp_str_vec;
-    unsigned int l;
+    size_t l;
     
     int compress_type=0; // uncompressed file
     ifstream fin;
@@ -518,13 +541,13 @@ size_t get_FASTA_lines(const string filename,
     if (filename.size()>=3 && 
         filename.substr(filename.size()-3,3)==".gz")
     {
-        fin_gz.open("zcat "+filename);
+        fin_gz.open("zcat '"+filename+"'");
         compress_type=1;
     }
     else if (filename.size()>=4 && 
         filename.substr(filename.size()-4,4)==".bz2")
     {
-        fin_gz.open("bzcat "+filename);
+        fin_gz.open("bzcat '"+filename+"'");
         compress_type=2;
     }
     else fin.open(filename.c_str());
@@ -582,64 +605,115 @@ int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
     int i2=0; // positions in resi_vec2
     int xlen=resi_vec1.size();
     int ylen=resi_vec2.size();
-    map<char,int> chainID_map1;
-    map<char,int> chainID_map2;
+    map<string,string> chainID_map1;
+    map<string,string> chainID_map2;
     if (byresi_opt==3)
     {
-        vector<char> chainID_vec;
-        char chainID;
+        vector<string> chainID_vec;
+        string chainID;
+        stringstream ss;
         int i;
         for (i=0;i<xlen;i++)
         {
-            chainID=resi_vec1[i][5];
+            chainID=resi_vec1[i].substr(5);
             if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
             {
                 chainID_vec.push_back(chainID);
-                chainID_map1[chainID]=chainID_vec.size();
+                ss<<chainID_vec.size();
+                chainID_map1[chainID]=ss.str();
+                ss.str("");
             }
         }
         chainID_vec.clear();
         for (i=0;i<ylen;i++)
         {
-            chainID=resi_vec2[i][5];
+            chainID=resi_vec2[i].substr(5);
             if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
             {
                 chainID_vec.push_back(chainID);
-                chainID_map2[chainID]=chainID_vec.size();
+                ss<<chainID_vec.size();
+                chainID_map2[chainID]=ss.str();
+                ss.str("");
             }
         }
-        chainID_vec.clear();
+        vector<string>().swap(chainID_vec);
     }
+    string chainID1="";
+    string chainID2="";
+    string chainID1_prev="";
+    string chainID2_prev="";
     while(i1<xlen && i2<ylen)
     {
-        if ((byresi_opt<=2 && resi_vec1[i1]==resi_vec2[i2]) || (byresi_opt==3
-             && resi_vec1[i1].substr(0,5)==resi_vec2[i2].substr(0,5)
-             && chainID_map1[resi_vec1[i1][5]]==chainID_map2[resi_vec2[i2][5]]))
+        if (byresi_opt==2)
+        {
+            chainID1=resi_vec1[i1].substr(5);
+            chainID2=resi_vec2[i2].substr(5);
+        }
+        else if (byresi_opt==3)
         {
-            sequence[0]+=seqx[i1++];
-            sequence[1]+=seqy[i2++];
+            chainID1=chainID_map1[resi_vec1[i1].substr(5)];
+            chainID2=chainID_map2[resi_vec2[i2].substr(5)];
         }
-        else if (atoi(resi_vec1[i1].substr(0,4).c_str())<=
-                 atoi(resi_vec2[i2].substr(0,4).c_str()))
+
+        if (chainID1==chainID2)
         {
-            sequence[0]+=seqx[i1++];
-            sequence[1]+='-';
+            if (atoi(resi_vec1[i1].substr(0,4).c_str())<
+                atoi(resi_vec2[i2].substr(0,4).c_str()))
+            {
+                sequence[0]+=seqx[i1++];
+                sequence[1]+='-';
+            }
+            else if (atoi(resi_vec1[i1].substr(0,4).c_str())>
+                     atoi(resi_vec2[i2].substr(0,4).c_str()))
+            {
+                sequence[0]+='-';
+                sequence[1]+=seqy[i2++];
+            }
+            else
+            {
+                sequence[0]+=seqx[i1++];
+                sequence[1]+=seqy[i2++];
+            }
+            chainID1_prev=chainID1;
+            chainID2_prev=chainID2;
         }
         else
         {
-            sequence[0]+='-';
-            sequence[1]+=seqy[i2++];
+            if (chainID1_prev==chainID1 && chainID2_prev!=chainID2)
+            {
+                sequence[0]+=seqx[i1++];
+                sequence[1]+='-';
+                chainID1_prev=chainID1;
+            }
+            else if (chainID1_prev!=chainID1 && chainID2_prev==chainID2)
+            {
+                sequence[0]+='-';
+                sequence[1]+=seqy[i2++];
+                chainID2_prev=chainID2;
+            }
+            else
+            {
+                sequence[0]+=seqx[i1++];
+                sequence[1]+=seqy[i2++];
+                chainID1_prev=chainID1;
+                chainID2_prev=chainID2;
+            }
         }
+        
     }
-    chainID_map1.clear();
-    chainID_map2.clear();
+    map<string,string>().swap(chainID_map1);
+    map<string,string>().swap(chainID_map2);
+    chainID1.clear();
+    chainID2.clear();
+    chainID1_prev.clear();
+    chainID2_prev.clear();
     return sequence[0].size();
 }
 
 int read_PDB(const vector<string> &PDB_lines, double **a, char *seq,
-    vector<string> &resi_vec, const int byresi_opt)
+    vector<string> &resi_vec, const int read_resi)
 {
-    unsigned int i;
+    size_t i;
     for (i=0;i<PDB_lines.size();i++)
     {
         a[i][0] = atof(PDB_lines[i].substr(30, 8).c_str());
@@ -647,9 +721,9 @@ int read_PDB(const vector<string> &PDB_lines, double **a, char *seq,
         a[i][2] = atof(PDB_lines[i].substr(46, 8).c_str());
         seq[i]  = AAmap(PDB_lines[i].substr(17, 3));
 
-        if (byresi_opt>=2) resi_vec.push_back(PDB_lines[i].substr(22,5)+
-                                              PDB_lines[i][21]);
-        if (byresi_opt==1) resi_vec.push_back(PDB_lines[i].substr(22,5));
+        if (read_resi>=2) resi_vec.push_back(PDB_lines[i].substr(22,5)+
+                                             PDB_lines[i][21]);
+        if (read_resi==1) resi_vec.push_back(PDB_lines[i].substr(22,5));
     }
     seq[i]='\0'; 
     return i;
@@ -699,7 +773,7 @@ string Trim(const string &inputString)
  * This function should only be called by main function, as it will
  * terminate a program if wrong alignment is given */
 void read_user_alignment(vector<string>&sequence, const string &fname_lign,
-    const bool I_opt)
+    const int i_opt)
 {
     if (fname_lign == "")
         PrintErrorAndQuit("Please provide a file name for option -i!");
@@ -729,10 +803,10 @@ void read_user_alignment(vector<string>&sequence, const string &fname_lign,
         PrintErrorAndQuit("ERROR: Fasta format is wrong, two proteins should be included.");
     if (sequence[0].size() != sequence[1].size())
         PrintErrorAndQuit("ERROR! FASTA file is wrong. The length in alignment should be equal for the two aligned proteins.");
-    if (I_opt)
+    if (i_opt==3)
     {
         int aligned_resNum=0;
-        for (unsigned int i=0;i<sequence[0].size();i++) 
+        for (size_t i=0;i<sequence[0].size();i++)
             aligned_resNum+=(sequence[0][i]!='-' && sequence[1][i]!='-');
         if (aligned_resNum<3)
             PrintErrorAndQuit("ERROR! Superposition is undefined for <3 aligned residues.");
diff --git a/modules/bindings/src/tmalign/param_set.h b/modules/bindings/src/tmalign/param_set.h
index 31ac268669606ac121a7082e46d63e16b695585a..9300404a4137f1b24daf2c8a0e4f6ade82ab7f06 100644
--- a/modules/bindings/src/tmalign/param_set.h
+++ b/modules/bindings/src/tmalign/param_set.h
@@ -10,11 +10,11 @@ void parameter_set4search(const int xlen, const int ylen,
     double &D0_MIN, double &Lnorm,
     double &score_d8, double &d0, double &d0_search, double &dcu0)
 {
-    //parameter initilization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8
+    //parameter initialization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8
     D0_MIN=0.5; 
     dcu0=4.25;                       //update 3.85-->4.25
  
-    Lnorm=getmin(xlen, ylen);        //normaliz TMscore by this in searching
+    Lnorm=getmin(xlen, ylen);        //normalize TMscore by this in searching
     if (Lnorm<=19)                    //update 15-->19
         d0=0.168;                   //update 0.5-->0.168
     else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8);
@@ -33,7 +33,7 @@ void parameter_set4final_C3prime(const double len, double &D0_MIN,
 {
     D0_MIN=0.3; 
  
-    Lnorm=len;            //normaliz TMscore by this in searching
+    Lnorm=len;            //normalize TMscore by this in searching
     if(Lnorm<=11) d0=0.3;
     else if(Lnorm>11&&Lnorm<=15) d0=0.4;
     else if(Lnorm>15&&Lnorm<=19) d0=0.5;
@@ -57,7 +57,7 @@ void parameter_set4final(const double len, double &D0_MIN, double &Lnorm,
     }
     D0_MIN=0.5; 
  
-    Lnorm=len;            //normaliz TMscore by this in searching
+    Lnorm=len;            //normalize TMscore by this in searching
     if (Lnorm<=21) d0=0.5;          
     else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8);
     if (d0<D0_MIN) d0=D0_MIN;   
@@ -70,7 +70,7 @@ void parameter_set4scale(const int len, const double d_s, double &Lnorm,
     double &d0, double &d0_search)
 {
     d0=d_s;          
-    Lnorm=len;            //normaliz TMscore by this in searching
+    Lnorm=len;            //normalize TMscore by this in searching
     d0_search=d0;
     if (d0_search>8)   d0_search=8;
     if (d0_search<4.5) d0_search=4.5;  
diff --git a/modules/bindings/src/tmalign/pdb2fasta.cpp b/modules/bindings/src/tmalign/pdb2fasta.cpp
index 420514205a22ce86d4cb0b9ddd1dbf4eda72e50d..7c94206ffebee9e6f6847318e001496e53ce64c7 100644
--- a/modules/bindings/src/tmalign/pdb2fasta.cpp
+++ b/modules/bindings/src/tmalign/pdb2fasta.cpp
@@ -31,6 +31,10 @@ void print_help()
 "             1: treat each MODEL as a separate chain (-ter should be 0)\n"
 "             2: treat each chain as a seperate chain (-ter should be <=1)\n"
 "\n"
+"    -het     Whether to read residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
 "    -infmt   Input format for chain\n"
 "            -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
 "             0: PDB format\n"
@@ -52,6 +56,7 @@ int main(int argc, char *argv[])
     int    ter_opt   =3;     // TER, END, or different chainID
     int    infmt_opt =-1;    // PDB or PDBx/mmCIF format
     int    split_opt =0;     // do not split chain
+    int    het_opt=0;        // do not read HETATM residues
     string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
     string suffix_opt="";    // set -suffix to empty
     string dir_opt   ="";    // set -dir to empty
@@ -84,6 +89,10 @@ int main(int argc, char *argv[])
         {
             infmt_opt=atoi(argv[i + 1]); i++;
         }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
         else xname=argv[i];
     }
 
@@ -92,7 +101,7 @@ int main(int argc, char *argv[])
     if (suffix_opt.size() && dir_opt.size()==0)
         PrintErrorAndQuit("-suffix is only valid if -dir is set");
     if (atom_opt.size()!=4)
-        PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
     if (split_opt==1 && ter_opt!=0)
         PrintErrorAndQuit("-split 1 should be used with -ter 0");
     else if (split_opt==2 && ter_opt!=0 && ter_opt!=1)
@@ -140,7 +149,7 @@ int main(int argc, char *argv[])
     {
         xname=chain_list[i];
         xchainnum=get_PDB_lines(xname, PDB_lines, chainID_list,
-            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt);
+            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt, het_opt);
         if (!xchainnum)
         {
             cerr<<"Warning! Cannot parse file: "<<xname
diff --git a/modules/bindings/src/tmalign/pdb2ss.cpp b/modules/bindings/src/tmalign/pdb2ss.cpp
index a346cad36fee720d72c7067280d830d59a7e2f06..d0732803d16652eb312ea337812998a761bd092a 100644
--- a/modules/bindings/src/tmalign/pdb2ss.cpp
+++ b/modules/bindings/src/tmalign/pdb2ss.cpp
@@ -3,8 +3,8 @@
 using namespace std;
 
 // secondary structure    01234
-const char* SSmapProtein=" CHTE";
-const char* SSmapRNA    =" .<>";
+//const char* SSmapProtein=" CHTE";
+//const char* SSmapRNA    =" .<>";
 
 void print_help()
 {
@@ -45,6 +45,10 @@ void print_help()
 "             0: PDB format\n"
 "             2: xyz format\n"
 "             3: PDBx/mmCIF format\n"
+"    -het     Whether to read residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
     <<endl;
     exit(EXIT_SUCCESS);
 }
@@ -61,6 +65,7 @@ int main(int argc, char *argv[])
     int    ter_opt   =3;     // TER, END, or different chainID
     int    infmt_opt =-1;    // PDB format
     int    split_opt =0;     // do not split chain
+    int    het_opt=0;        // do not read HETATM residues
     string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
     string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
     string suffix_opt="";    // set -suffix to empty
@@ -98,6 +103,10 @@ int main(int argc, char *argv[])
         {
             infmt_opt=atoi(argv[i + 1]); i++;
         }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
         else xname=argv[i];
     }
 
@@ -106,9 +115,9 @@ int main(int argc, char *argv[])
     if (suffix_opt.size() && dir_opt.size()==0)
         PrintErrorAndQuit("-suffix is only valid if -dir is set");
     if (atom_opt.size()!=4)
-        PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
     if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
-        PrintErrorAndQuit("ERROR! molecule type must be either RNA or protein.");
+        PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein.");
     else if (mol_opt=="protein" && atom_opt=="auto")
         atom_opt=" CA ";
     else if (mol_opt=="RNA" && atom_opt=="auto")
@@ -153,17 +162,16 @@ int main(int argc, char *argv[])
     int    xlen;                      // chain length
     int    xchainnum;                 // number of chains in a PDB file
     char   *seqx;                     // for the protein sequence 
-    int    *secx;                     // for the secondary structure 
+    char   *secx;                     // for the secondary structure 
     double **xa;                      // for input vectors xa[0...xlen-1][0..2] and
     vector<string> resi_vec;          // residue index for chain
-    string sequence;                  // secondary structure sequence
 
     /* loop over file names */
     for (i=0;i<chain_list.size();i++)
     {
         xname=chain_list[i];
         xchainnum=get_PDB_lines(xname, PDB_lines, chainID_list,
-            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt);
+            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt, het_opt);
         if (!xchainnum)
         {
             cerr<<"Warning! Cannot parse file: "<<xname
@@ -183,26 +191,15 @@ int main(int argc, char *argv[])
             }
             NewArray(&xa, xlen, 3);
             seqx = new char[xlen + 1];
-            secx = new int[xlen];
+            secx = new char[xlen + 1];
             xlen = read_PDB(PDB_lines[chain_i], xa, seqx, resi_vec, 0);
-            if (mol_vec[chain_i]>0) // RNA
-            {
-                make_sec(seqx,xa, xlen, secx,atom_opt);
-                for (l=0;l<PDB_lines[chain_i].size();l++)
-                    sequence+=SSmapRNA[secx[l]];
-            }
-            else //protein
-            {
-                make_sec(xa, xlen, secx);
-                for (l=0;l<PDB_lines[chain_i].size();l++)
-                    sequence+=SSmapProtein[secx[l]];
-            }
+            if (mol_vec[chain_i]>0) make_sec(seqx,xa, xlen, secx,atom_opt);
+            else make_sec(xa, xlen, secx); // protein
             
             cout<<'>'<<xname.substr(dir_opt.size(),
                 xname.size()-dir_opt.size()-suffix_opt.size())
-                <<chainID_list[chain_i]<<'\t'<<xlen<<'\n'<<sequence<<endl;
+                <<chainID_list[chain_i]<<'\t'<<xlen<<'\n'<<secx<<endl;
 
-            sequence.clear();
             PDB_lines[chain_i].clear();
             DeleteArray(&xa, xlen);
             delete [] seqx;
diff --git a/modules/bindings/src/tmalign/pdb2xyz.cpp b/modules/bindings/src/tmalign/pdb2xyz.cpp
index 6fd235f6e3941acfe934fa0aa0fe0490ba406d36..d151f1e741b1a419482618d6dc08ac01197be2db 100644
--- a/modules/bindings/src/tmalign/pdb2xyz.cpp
+++ b/modules/bindings/src/tmalign/pdb2xyz.cpp
@@ -31,6 +31,10 @@ void print_help()
 "             1: treat each MODEL as a separate chain (-ter should be 0)\n"
 "             2: treat each chain as a seperate chain (-ter should be <=1)\n"
 "\n"
+"    -het     Whether to read residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
 "    -infmt   Input format for chain2\n"
 "            -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
 "             3: PDBx/mmCIF format\n"
@@ -50,6 +54,7 @@ int main(int argc, char *argv[])
     int    ter_opt   =3;     // TER, END, or different chainID
     int    infmt_opt =-1;    // PDB or PDBx/mmCIF format
     int    split_opt =0;     // do not split chain
+    int    het_opt=0;        // do not read HETATM residues
     string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
     string suffix_opt="";    // set -suffix to empty
     string dir_opt   ="";    // set -dir to empty
@@ -82,6 +87,10 @@ int main(int argc, char *argv[])
         {
             infmt_opt=atoi(argv[i + 1]); i++;
         }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
         else xname=argv[i];
     }
 
@@ -90,7 +99,7 @@ int main(int argc, char *argv[])
     if (suffix_opt.size() && dir_opt.size()==0)
         PrintErrorAndQuit("-suffix is only valid if -dir is set");
     if (atom_opt.size()!=4)
-        PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
     if (split_opt==1 && ter_opt!=0)
         PrintErrorAndQuit("-split 1 should be used with -ter 0");
     else if (split_opt==2 && ter_opt!=0 && ter_opt!=1)
@@ -135,7 +144,7 @@ int main(int argc, char *argv[])
     {
         xname=chain_list[i];
         xchainnum=get_PDB_lines(xname, PDB_lines, chainID_list,
-            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt);
+            mol_vec, ter_opt, infmt_opt, atom_opt, split_opt, het_opt);
         if (!xchainnum)
         {
             cerr<<"Warning! Cannot parse file: "<<xname
diff --git a/modules/bindings/src/tmalign/readme.txt b/modules/bindings/src/tmalign/readme.txt
index ea276da42ab2272e0897ddd3dba0f6a7b688c92c..3249215e8e3a65fa56cb3661df7ed3a24638c18b 100644
--- a/modules/bindings/src/tmalign/readme.txt
+++ b/modules/bindings/src/tmalign/readme.txt
@@ -8,7 +8,7 @@
    Please report issues to yangzhanglab@umich.edu
 
    References to cite:
-   S Gong, C Zhang, Y Zhang. Bioinformatics (2019)
+   S Gong, C Zhang, Y Zhang. Bioinformatics, btz282 (2019)
    Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005)
 
    DISCLAIMER:
@@ -50,6 +50,17 @@
               (3) automatic detection of molecule type (protein vs RNA).
    2019/01/07: C Zhang added support for PDBx/mmCIF format.
    2019/02/09: Fixed asymmetric alignment bug.
+   2019/03/17: Added the -cp option for circular permutation
+   2019/03/27: Added the -mirror option for mirror structure alignment
+   2019/04/25: The RNA-align algorithm was published by Bioinformatics
+   2019/07/24: Fixed bug in displaying matching residues.
+               Added GDT and MaxSub to TMscore program.
+   2019/08/18: Prevent excessive circular permutation alignment by -cp.
+   2020/05/19: Add back rasmol output
+   2020/12/12: Fixed bug in double precision coordinate mmcif alignment
+   2021/01/07: Fixed bug in TMscore -c
+   2021/05/29: Remove unnecessary depedency on malloc.h, which prevent
+               compilation on Mac OS
 ===============================================================================
 
 =========================
@@ -84,4 +95,4 @@ fortran version, including RNA alignment and batch alignment of multiple
 structures. A full list of available options can be explored by:
   ./TMalign -h
 
-02/09/2019
+2021/05/20
diff --git a/modules/bindings/src/tmalign/se.cpp b/modules/bindings/src/tmalign/se.cpp
index aa22b07f744578acaac523df25e0a401b8ad2923..c4d7606816f84beceb4b569f0b14d1d082ddc119 100644
--- a/modules/bindings/src/tmalign/se.cpp
+++ b/modules/bindings/src/tmalign/se.cpp
@@ -1,4 +1,5 @@
 #include "se.h"
+#include "NWalign.h"
 
 using namespace std;
 
@@ -54,6 +55,10 @@ void print_extra_help()
 "             3: (similar to TMscore -c, should be used with -ter <=1)\n"
 "                align by residue index and order of chain\n"
 "\n"
+"    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
 "    -infmt1  Input format for chain1\n"
 "    -infmt2  Input format for chain2\n"
 "            -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
@@ -122,6 +127,7 @@ int main(int argc, char *argv[])
     int    ter_opt   =3;     // TER, END, or different chainID
     int    split_opt =0;     // do not split chain
     int    outfmt_opt=0;     // set -outfmt to full output
+    int    het_opt=0;        // do not read HETATM residues
     string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
     string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
     string suffix_opt="";    // set -suffix to empty
@@ -206,6 +212,10 @@ int main(int argc, char *argv[])
         {
             byresi_opt=atoi(argv[i + 1]); i++;
         }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
         else if (xname.size() == 0) xname=argv[i];
         else if (yname.size() == 0) yname=argv[i];
         else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
@@ -228,9 +238,9 @@ int main(int argc, char *argv[])
     if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size()))
         PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2");
     if (atom_opt.size()!=4)
-        PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
+        PrintErrorAndQuit("ERROR! Atom name must have 4 characters, including space.");
     if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
-        PrintErrorAndQuit("ERROR! molecule type must be either RNA or protein.");
+        PrintErrorAndQuit("ERROR! Molecule type must be either RNA or protein.");
     else if (mol_opt=="protein" && atom_opt=="auto")
         atom_opt=" CA ";
     else if (mol_opt=="RNA" && atom_opt=="auto")
@@ -303,7 +313,7 @@ int main(int argc, char *argv[])
         /* parse chain 1 */
         xname=chain1_list[i];
         xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
-            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt);
+            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
         if (!xchainnum)
         {
             cerr<<"Warning! Cannot parse file: "<<xname
@@ -333,7 +343,8 @@ int main(int argc, char *argv[])
                 {
                     yname=chain2_list[j];
                     ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
-                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt);
+                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
+                        het_opt);
                     if (!ychainnum)
                     {
                         cerr<<"Warning! Cannot parse file: "<<yname
@@ -373,6 +384,7 @@ int main(int argc, char *argv[])
                     double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
                     int n_ali=0;
                     int n_ali8=0;
+                    int *invmap = new int[ylen+1];
 
                     /* entry function for structure alignment */
                     se_main(
@@ -382,24 +394,30 @@ int main(int argc, char *argv[])
                         rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
                         xlen, ylen, sequence, Lnorm_ass, d0_scale,
                         i_opt, a_opt, u_opt, d_opt,
-                        mol_vec1[chain_i]+mol_vec2[chain_j]);
+                        mol_vec1[chain_i]+mol_vec2[chain_j], 
+                        outfmt_opt, invmap);
+
+                    if (outfmt_opt>=2) 
+                        get_seqID(invmap, seqx, seqy, ylen, Liden, n_ali8);
 
                     /* print result */
                     output_results(
-                        xname.substr(dir1_opt.size()).c_str(),
-                        yname.substr(dir2_opt.size()).c_str(),
+                        xname.substr(dir1_opt.size()+dir_opt.size()).c_str(),
+                        yname.substr(dir2_opt.size()+dir_opt.size()).c_str(),
                         chainID_list1[chain_i].c_str(),
                         chainID_list2[chain_j].c_str(),
                         xlen, ylen, t0, u0, TM1, TM2, 
                         TM3, TM4, TM5, rmsd0, d0_out,
                         seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
-                        n_ali8, n_ali, L_ali, TM_ali, rmsd_ali,
+                        n_ali8, L_ali, TM_ali, rmsd_ali,
                         TM_0, d0_0, d0A, d0B,
                         Lnorm_ass, d0_scale, d0a, d0u, 
-                        "", outfmt_opt, ter_opt, "",
-                        false, false, a_opt, u_opt, d_opt);
+                        "", outfmt_opt, ter_opt, 0, split_opt,
+                        0, "", false, a_opt, u_opt, d_opt, 0,
+                        resi_vec1, resi_vec2);
 
                     /* Done! Free memory */
+                    delete [] invmap;
                     seqM.clear();
                     seqxA.clear();
                     seqyA.clear();
diff --git a/modules/bindings/src/tmalign/se.h b/modules/bindings/src/tmalign/se.h
index 0021dd6d4b52ccff4f16ffbcb9b9a08ab4726818..6ccc84132d02b9e54179d9b6378937af38ce35a0 100644
--- a/modules/bindings/src/tmalign/se.h
+++ b/modules/bindings/src/tmalign/se.h
@@ -1,6 +1,7 @@
 #include "TMalign.h"
 
-/* entry function for se */
+/* entry function for se
+ * outfmt_opt>=2 should not parse sequence alignment */
 int se_main(
     double **xa, double **ya, const char *seqx, const char *seqy,
     double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
@@ -11,31 +12,32 @@ int se_main(
     double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
     const int xlen, const int ylen, const vector<string> &sequence,
     const double Lnorm_ass, const double d0_scale, const bool i_opt,
-    const bool a_opt, const bool u_opt, const bool d_opt, const int mol_type)
+    const bool a_opt, const bool u_opt, const bool d_opt, const int mol_type,
+    const int outfmt_opt, int *invmap)
 {
     double D0_MIN;        //for d0
     double Lnorm;         //normalization length
     double score_d8,d0,d0_search,dcu0;//for TMscore search
-    double t[3]={0,0,0};  // dummy translation vection
-    double u[3][3]={{1,0,0},{0,1,0},{0,0,1}}; // dummy rotation matrix
     double **score;       // Input score table for dynamic programming
     bool   **path;        // for dynamic programming  
     double **val;         // for dynamic programming  
 
-    int *m1, *m2;
+    int *m1=NULL;
+    int *m2=NULL;
     double d;
-    m1=new int[xlen]; //alignd index in x
-    m2=new int[ylen]; //alignd index in y
+    if (outfmt_opt<2)
+    {
+        m1=new int[xlen]; //alignd index in x
+        m2=new int[ylen]; //alignd index in y
+    }
 
     /***********************/
     /* allocate memory     */
     /***********************/
-    int minlen = min(xlen, ylen);
     NewArray(&score, xlen+1, ylen+1);
     NewArray(&path, xlen+1, ylen+1);
     NewArray(&val, xlen+1, ylen+1);
-    int *invmap          = new int[ylen+1];
-    for(int i=0; i<ylen; i++) invmap[i]=-1;
+    //int *invmap          = new int[ylen+1];
 
     /* set d0 */
     parameter_set4search(xlen, ylen, D0_MIN, Lnorm,
@@ -52,13 +54,10 @@ int se_main(
             d0u, d0_search, mol_type); // set d0u
 
     /* perform alignment */
-    if (!i_opt)
-        NWDP_TM(path, val, xa, ya, xlen, ylen, t, u, d0*d0, 0, invmap);
+    for(int j=0; j<ylen; j++) invmap[j]=-1;
+    if (!i_opt) NWDP_SE(path, val, xa, ya, xlen, ylen, d0*d0, 0, invmap);
     else
     {
-        for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
-            invmap[j] = -1;
-
         int i1 = -1;// in C version, index starts from zero, not from one
         int i2 = -1;
         int L1 = sequence[0].size();
@@ -78,6 +77,8 @@ int se_main(
 
     rmsd0=TM1=TM2=TM3=TM4=TM5=0;
     int k=0;
+    n_ali=0;
+    n_ali8=0;
     for(int i=0,j=0; j<ylen; j++)
     {
         i=invmap[j];
@@ -85,10 +86,13 @@ int se_main(
         {
             n_ali++;
             d=sqrt(dist(&xa[i][0], &ya[j][0]));
-            if (d <= score_d8)
+            if (d <= score_d8 || i_opt)
             {
-                m1[k]=i;
-                m2[k]=j;
+                if (outfmt_opt<2)
+                {
+                    m1[k]=i;
+                    m2[k]=j;
+                }
                 k++;
                 TM2+=1/(1+(d/d0B)*(d/d0B)); // chain_1
                 TM1+=1/(1+(d/d0A)*(d/d0A)); // chain_2
@@ -107,6 +111,14 @@ int se_main(
     TM5/=ylen;
     if (n_ali8) rmsd0=sqrt(rmsd0/n_ali8);
 
+    if (outfmt_opt>=2)
+    {
+        DeleteArray(&score, xlen+1);
+        DeleteArray(&path, xlen+1);
+        DeleteArray(&val, xlen+1);
+        return 0;
+    }
+
     /* extract aligned sequence */
     int ali_len=xlen+ylen; //maximum length of alignment
     seqxA.assign(ali_len,'-');
@@ -115,6 +127,7 @@ int se_main(
     
     int kk=0, i_old=0, j_old=0;
     d=0;
+    Liden=0;
     for(int k=0; k<n_ali8; k++)
     {
         for(int i=i_old; i<m1[k]; i++)
@@ -168,7 +181,7 @@ int se_main(
     seqM =seqM.substr(0,kk);
 
     /* free memory */
-    delete [] invmap;
+    //delete [] invmap;
     delete [] m1;
     delete [] m2;
     DeleteArray(&score, xlen+1);
diff --git a/modules/bindings/src/wrap_tmalign.cc b/modules/bindings/src/wrap_tmalign.cc
index bbc2eb506a789b4aea3306b90e80b27c061af597..cefbe1a4497372983902f5f5f143a5c21ed1e1eb 100644
--- a/modules/bindings/src/wrap_tmalign.cc
+++ b/modules/bindings/src/wrap_tmalign.cc
@@ -48,8 +48,8 @@ TMAlignResult WrappedTMAlign(const geom::Vec3List& pos_one,
   char* seqy = new char[ylen+1];
   seqx[xlen] = '\0';
   seqy[ylen] = '\0';
-  int* secx = new int[xlen];
-  int* secy = new int[ylen];
+  char* secx = new char[xlen];
+  char* secy = new char[ylen];
 
   // use TMalign functionality to generate position arrays
   double** xa;
@@ -83,7 +83,6 @@ TMAlignResult WrappedTMAlign(const geom::Vec3List& pos_one,
   int a_opt = 0; 
   std::vector<String> sequence; 
   bool i_opt = false;
-  bool I_opt = false;
   double TMcut = -1; 
 
   // following variables are copied from the TMAlign source code
@@ -104,7 +103,7 @@ TMAlignResult WrappedTMAlign(const geom::Vec3List& pos_one,
   TMalign_main(xa, ya, seqx, seqy, secx, secy, t0, u0, TM1, TM2, TM3, TM4, TM5,
                d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
                rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, xlen, ylen, 
-               sequence, Lnorm_ass, d0_scale, i_opt, I_opt, a_opt, u_opt, d_opt, 
+               sequence, Lnorm_ass, d0_scale, i_opt, a_opt, u_opt, d_opt, 
                fast, 0, TMcut);
 
   // cleanup
diff --git a/modules/bindings/tests/test_tmtools.py b/modules/bindings/tests/test_tmtools.py
index 148e6263c39fc06dc6514b91d2c0c76d78f759b2..cddd9ed7aaeb2cb3176828b95df6e80e38b53355 100644
--- a/modules/bindings/tests/test_tmtools.py
+++ b/modules/bindings/tests/test_tmtools.py
@@ -4,6 +4,7 @@ from ost import settings
 from ost import testutils
 from ost.seq.alg import SequenceIdentity
 from ost.bindings import tmtools
+from ost.bindings import WrappedTMAlign
 
 class TestTMBindings(unittest.TestCase):
   
@@ -54,6 +55,24 @@ class TestTMBindings(unittest.TestCase):
     identity = geom.Mat4(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1)
     self.assertEqual(tm_result.transform, identity)
 
+  def testWrappedTMAlign(self):
+
+    tm_result = WrappedTMAlign(self.protein.CreateFullView().chains[0],
+                               self.protein.CreateFullView().chains[0])
+
+    # model and reference are the same, we expect pretty good results
+    self.assertAlmostEqual(tm_result.rmsd, 0.0, places=4)
+    self.assertAlmostEqual(tm_result.tm_score, 1.0, places=4)
+    self.assertEqual(tm_result.aligned_length, len(self.protein.chains[0].residues))
+    self.assertEqual(SequenceIdentity(tm_result.alignment), 100.0)
+
+    # transformation should be identity matrix (no transformation at all...)
+    identity = geom.Mat4(1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1)
+    for i in range(4):
+      for j in range(4):
+        self.assertAlmostEqual(tm_result.transform[i,j], identity[i,j])
+
+
 
 if __name__ == "__main__":
   testutils.RunTests()