#!/usr/bin/env python
# coding: utf-8

# # 10. Writing mapped trajectories from Gromacs all atom (AA) trajectories
# 
# ### Prerequisites
# * MDAnalysis
# * gsd
# 
# This example maps a system of two 12 amino acid long peptides. One amino acid residue's center of mass represents one CG bead. The all atom topology and trajectory files can be found in `examples` directory. The mapped trajectory will be saved in gsd format to `CG_tutorial` directory. 

# In[1]:


import hoomd
import numpy as np 
import hoomd.md
import gsd,gsd.hoomd
import gsd.pygsd
import hoomd.htf as htf
import tensorflow as tf
import MDAnalysis as mda

# disable GPU
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

#disable warnings
import warnings
warnings.filterwarnings('ignore')


# In this example we will use a system of two peptides which are 12 amino acids long. First we need to create the non-mass weighted mapping operator to CG the AA trajectory. Note that the mapping operator is defined only for one peptide as the peptides are identical. 

# In[2]:


u = mda.Universe('test_topol.pdb','test_traj.trr')

# we select segment A to define the mapping operator. 
segA = u.select_atoms('segid A')

m = len(segA.atoms.residues) #N = num of CG beads
n = len(segA.atoms)  #AA = num of all atoms
map_mat = np.zeros((m,n))

atm_list = list(segA.atoms)
for a in atm_list:
    cgid = a.resid
    aaid = a.id
    #print(cgid,aaid)
    map_mat[cgid-1,aaid-1] =1

print('Mapping operator:', map_mat)
    

# Next we need to define the CG mapping for our system using the mappng operator above. We will use the AA topology (PDB converted into a GSD file) for this. Take a look at `example 02` for a detailed description.

# In[3]:


c = hoomd.context.initialize('--mode=cpu')
system = hoomd.init.read_gsd(filename='test_topol.gsd')
c.sorter.disable()
set_rcut = 10.0

outfile = 'testtraj_mapped.gsd'

molecule_mapping_index = htf.find_molecules(system) 

# get number of atoms
N = sum([len(m) for m in molecule_mapping_index]) 

# get number of molecules
M = len(molecule_mapping_index) 

# get number of atoms in a molecule=MN
MN = len(molecule_mapping_index[0])

molecule_mapping = map_mat
print('atoms per mol',MN,'mapping mat',molecule_mapping.shape)
bead_number = molecule_mapping.shape[0]

cg_mapping = htf.sparse_mapping([molecule_mapping for _ in molecule_mapping_index], 
                                molecule_mapping_index, system=system)

assert cg_mapping.shape == (M * bead_number, N)


# Now let's read the trajectory and generate CG bead types, indices and COMs. There are 24 residues in the system. Hence we will name each bead index by an integer scaling from 1 to 24. CG beads names will be `B + index`. A user can give any bead name of choice. We will write each mapped state to the output file in GSD format.

# In[4]:


t = gsd.hoomd.open(name=outfile, mode='wb')
frame_num = 0

CGnum = M * bead_number
print('Number of CG beads: ',CGnum)
CGids = np.arange(1,CGnum+1)

#creating bead names
CGtypes = []
for i in CGids:
    CGtypes.append('B'+str(i))

for inputs, ts in htf.iter_from_trajectory(16, u, r_cut=45):
    positions = inputs[1]
    box = inputs[2]
    box_size = htf.box_size(box)
    positions = positions[:, :3]
    
    # First let's calculate the COMs of the CG groups
    #Note that each residue is grouped into one atom group and its COM is computed
   
    mapped_pos = htf.center_of_mass(tf.cast(positions[:,:3],dtype=tf.float64), 
                                                         tf.cast(cg_mapping,dtype=tf.float64), box_size)
    mdbox = u.dimensions
    coms = mapped_pos.numpy()
    
    #Let's generate snapshots of each frame and write to a GSD file 
    t.append(htf.create_frame(frame_num, CGnum, CGtypes, CGids, coms, mdbox))
    frame_num += 1

print('GSD file written')  


# To see test if our mapped trajectory was written properly, we can try and read the `CG_tutorial/testtraj_mapped.gsd` file. You can see how the position of the first CG bead changes over the frames. 

# In[5]:


with gsd.pygsd.GSDFile(open(outfile, 'rb')) as f:
    t = gsd.hoomd.HOOMDTrajectory(f)
    for frame,i in enumerate(t):
        pos = i.particles.position
        types = i.particles.types
        typeids = i.particles.typeid
        print(frame, pos[0])