Source code for mrdna.readers.segmentmodel_from_oxdna

from mrdna import logger, devlogger
from .segmentmodel_from_lists import model_from_basepair_stack_3prime
from ..arbdmodel.coords import rotationAboutAxis
import pickle
import pandas as pd
import sys
import mrdna.readers.libs as libs
import numpy as np
from scipy.spatial import distance_matrix
pd.options.mode.chained_assignment = None  # default='warn'

_seq_to_int_dict = dict(A=0,T=1,C=2,G=3)
_seq_to_int_dict = {k:str(v) for k,v in _seq_to_int_dict.items()}

_yrot = rotationAboutAxis(axis=(0,1,0), angle=180).dot(rotationAboutAxis(axis=(0,0,1),angle=-40))

[docs] def mrdna_model_from_oxdna(coordinate_file, topology_file,virt2nuc=None,get_nt_prop=False, **model_parameters): """ Construct an mrdna model from oxDNA coordinate and topology files """ top_data = np.loadtxt(topology_file, skiprows=1, unpack=True, dtype=np.dtype('i4,U1,i4,i4') ) conf_data = np.loadtxt(coordinate_file, skiprows=3) def _get_bp(sequence=None): dists = distance_matrix(r,basepair_pos) + np.eye(len(r))*1000 dists = 0.5*(dists + dists.T) bp = np.array([np.argmin(da) for da in dists]) for i,j in enumerate(bp): if j == -1: continue # devlogger.info(f'bp {i} {j} {dists[i,j]}') if dists[i,j] > 2: bp[i] = -1 elif bp[j] != i: bpj = bp[j] logger.warning( " ".join([str(_x) for _x in ["Bad pair", i, j, bp[i], bp[j], dists[i,j], dists[j,i], dists[bpj,j], dists[j,bpj]]]) ) for i,j in enumerate(bp): if j == -1: continue if bp[j] != i: bpj = bp[j] logger.warning( " ".join([str(_x) for _x in ["Bad pair2", i, j, bp[i], bp[j], dists[i,j], dists[j,i], dists[bpj,j], dists[j,bpj]]]) ) raise Exception if sequence is not None: seq = sequence bp_seq = sequence[bp] bp_seq[bp==-1] = 'X' bad_bps = np.where( (bp >= 0) & (((seq == 'C') & (bp_seq != 'G')) | ((seq == 'G') & (bp_seq != 'C')) | ((seq == 'T') & (bp_seq != 'A')) | ((seq == 'U') & (bp_seq != 'A')) | ((seq == 'A') & ((bp_seq != 'T') | (bp_seq != 'U'))) ) )[0] bp[bp[bad_bps]] = -1 bp[bad_bps] = -1 return bp def _get_stack(): dists = distance_matrix( r + 3.5*normal_dir + 2.1*perp_dir -1*base_dir, r ) + np.eye(len(r))*1000 stack = np.array([np.argmin(da) for da in dists]) for i,j in enumerate(stack): if dists[i,j] > 8: stack[i] = -1 elif i < 10: ## development info # devlogger.info([i,j,dists[i,j]]) # dr = r[j] - (r[i] - normal_dir[i]*3.4 + perp_dir[i]*1 + base_dir[i]*1) dr = r[j] - r[i] # devlogger.info([normal_dir[i].dot(dr), perp_dir[i].dot(dr), base_dir[i].dot(dr)]) return np.array(stack) def _find_vh_vb_table(s,is_scaf): L=[] for i in list(s.keys()): vh,zid=i strand,indices=s[i] if len(indices)==0: continue else: if len(indices)==1: zids=[str(zid)] else: zids=[str(zid)+"."+str(j) for j in range(len(indices))] for index,z in zip(indices,zids): L.append(pd.Series({"index":index,"vh":vh,"zid":z,"strand":strand,"is_scaf":bool(is_scaf)})) return L def get_virt2nuc(virt2nuc,top_data): sys.modules["libs"]=libs logger.info("""You are using a feature that depends on the tacoxdna library: A. Suma et al., 2019, "TacoxDNA: A user‐friendly web server for simulations of complex DNA structures, from single strands to origami", J. Comput. Chem. 40, 2586""") virt_pickle=open(virt2nuc,"rb") vh_vb,pattern=pickle.load(virt_pickle) L1=_find_vh_vb_table(vh_vb._scaf,1) L2=_find_vh_vb_table(vh_vb._stap,0) nt_prop=pd.DataFrame(L1+L2,dtype=object) nt_prop["index"]=nt_prop["index"].astype(int) nt_prop.set_index("index",inplace=True) nt_prop.sort_index(inplace=True) nt_prop["threeprime"]=top_data[2] nt_prop["seq"]=top_data[1] nt_prop["stack"]=top_data[2] vh_bool=1-(nt_prop["vh"]%2)*2 is_scaf_bool=nt_prop["is_scaf"]*2-1 nt_prop["fwd"]=np.array((is_scaf_bool.T*vh_bool+1)/2,dtype=bool) bp_map=dict(zip(zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"]),nt_prop.index)) for i in nt_prop.index: if nt_prop.loc[i]["threeprime"] in nt_prop.index: if nt_prop.loc[nt_prop.loc[i]["threeprime"]]["vh"]!=nt_prop.loc[i]["vh"]: nt_prop.loc[i]["stack"]=-1 bp=-np.ones(len(nt_prop.index),dtype=int) counter=0 for i,j,k in zip(nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"]): try: bp[counter]=bp_map[(i,j,not(k))] except: pass counter+=1 nt_prop["bp"]=bp non_stack_ind,=np.where(nt_prop["stack"]==-1) for i in non_stack_ind: zid=int(nt_prop.loc[i]["zid"])+int(nt_prop.loc[i]["fwd"])*2-1 try: nt_prop["stack"][i]=bp_map[(nt_prop.loc[i]["vh"],str(zid),nt_prop.loc[i]["fwd"])] except: continue return nt_prop try: nt_prop=get_virt2nuc(virt2nuc,top_data) r=conf_data[:,:3] * 8.518 base_dir = conf_data[:,3:6] # basepair_pos = r + base_dir*6.0 basepair_pos = r + base_dir*10.0 normal_dir = -conf_data[:,6:9] perp_dir = np.cross(base_dir, normal_dir) orientation = [np.array(o).T.dot(_yrot) for o in zip(perp_dir,-base_dir,-normal_dir)] seq=nt_prop["seq"] bp=nt_prop["bp"] stack=np.array((list(nt_prop["stack"]))) three_prime=nt_prop["threeprime"] nt_prop["orientation"]=orientation nt_prop["r"]=list(r) except: if virt2nuc is not None: logger.info("warning: virt2nuc not read. guessing structure...") ## Reverse direction so indices run 5'-to-3' top_data = [a[::-1] for a in top_data] conf_data = conf_data[::-1,:] r = conf_data[:,:3] * 8.518 base_dir = conf_data[:,3:6] # basepair_pos = r + base_dir*6.0 basepair_pos = r + base_dir*10.0 normal_dir = -conf_data[:,6:9] perp_dir = np.cross(base_dir, normal_dir) orientation = np.array([np.array(o).T.dot(_yrot) for o in zip(perp_dir,-base_dir,-normal_dir)]) seq = top_data[1] bp = _get_bp(seq) stack = _get_stack() three_prime = len(r) - top_data[2] -1 five_prime = len(r) - top_data[3] -1 three_prime[three_prime >= len(r)] = -1 five_prime[five_prime >= len(r)] = -1 nt_prop=pd.DataFrame({"r":list(r),"bp":list(bp),"stack":list(stack),"threeprime":list(three_prime), "seq":list(seq),"orientation":list(orientation)}) def _debug_write_bonds(): from ..arbdmodel import ParticleType, PointParticle, ArbdModel, Group bond = tuple() b_t = ParticleType('BASE') p_t = ParticleType('PHOS') parts = [] for i,(r0,r_bp,three_prime0,bp0,stack0,seq0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack, seq)): p = PointParticle(p_t, name='PHOS', position = r0, resid=i) b = PointParticle(b_t, name=seq0, position = 0.5*(r0+r_bp), resid=i) parts.extend((p,b)) model = ArbdModel(parts) model.writePdb('test.pdb') for i,(r0,r_bp,three_prime0,bp0,stack0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack)): model.add_bond(parts[2*i],parts[2*i+1],bond) j = three_prime0 if j >= 0: model.add_bond(parts[2*i],parts[2*j],bond) j = bp0 if j >= 0: model.add_bond(parts[2*i+1],parts[2*j+1],bond) model.writePsf('test.psf') model.bonds = [] for i,(r0,r_bp,three_prime0,bp0,stack0) in enumerate(zip(r,basepair_pos, three_prime, bp, stack)): j = stack0 if j >= 0: model.add_bond(parts[2*i],parts[2*j],bond) model.writePsf('test.stack.psf') ## _debug_write_bonds() logger.info(f'mrdna_model_from_oxdna: num_bp, num_ss_nt, num_stacked: {np.sum(bp>=0)//2} {np.sum(bp<0)} {np.sum(stack >= 0)}') model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, explicitly_unstacked = np.argwhere(stack == -1), **model_parameters ) """ model.DEBUG = True model.generate_bead_model(1,1,False,True,one_bead_per_monomer=True) for seg in model.segments: for bead in seg.beads: bead.position = bead.position + np.random.standard_normal(3) simulate( model, output_name='test', directory='test4' ) """ model._dataframe=nt_prop return model
if __name__ == "__main__": mrdna_model_from_oxdna("0-from-collab/nanopore.oxdna","0-from-collab/nanopore.top") # mrdna_model_from_oxdna("2-oxdna.manual/output/from_mrdna-oxdna-min.last.conf","0-from-collab/nanopore.top")