# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
from .segmentmodel_from_lists import model_from_basepair_stack_3prime
import scadnano as sc
from ..arbdmodel.coords import rotationAboutAxis
from ..model.dna_sequence import m13 as m13seq
## TODO: separate SegmentModel from ArbdModel so multiple parts can be combined
## TODO: catch circular strands in "get_5prime" cadnano calls
## TODO: handle special motifs
## - doubly-nicked helices
## - helices that should be stacked across an empty region (crossovers from and end in the helix to another end in the helix)
## - circular constructs
[docs]
def get_helix_dir(helix,design):
yaw_axis = np.array((0, 1, 0))
pitch_axis=np.array([1, 0, 0])
roll_axis =np.array([0, 0, 1])
pitch_axis=rotationAboutAxis(yaw_axis,-design.yaw_of_helix(helix)).dot(pitch_axis)
roll_axis=rotationAboutAxis(yaw_axis,-design.yaw_of_helix(helix)).dot(roll_axis)
roll_axis=rotationAboutAxis(pitch_axis,design.pitch_of_helix(helix)).dot(roll_axis)
return roll_axis
[docs]
def gen_id_series(domain,design):
df=pd.DataFrame(columns=["vh","zid","fwd","r"],index=range(domain.dna_length()),dtype=object)
df["loopout"]=False
hid=domain.helix
df["vh"]=hid
fwd=domain.forward
df["fwd"]=fwd
x=design.helices[domain.helix].calculate_position(design.grid,design.geometry).x*10
y=design.helices[domain.helix].calculate_position(design.grid,design.geometry).y*10
start=domain.start
end=domain.end
dels=domain.deletions
loops=domain.insertions
zids=[i for i in range(start,end)]
if len(dels)>0:
for d in dels:
zids.pop(zids.index(d))
if len(loops)>0:
for ind,ins in loops:
n=zids.index(ind)
zids.pop(n)
loop=[ind+round((j/(ins+1))*100)/100 for j in range(ins+1)]
loop.reverse()
for j in loop:
zids.insert(n,j)
df["zid"]=zids
if fwd==True:
start5=domain.start
end3=domain.end
df["zid"]=zids
else:
start5=domain.end
end3=domain.start
df["zid"]=zids[::-1]
df["x"]=x
df["y"]=y
df["z"]=df["zid"]*3.4
axis=get_helix_dir(hid,design)
df["orientation"]=[rotationAboutAxis(axis, design.helices[hid].backbone_angle_at_offset(float(i),fwd,design.geometry)) for i in df["zid"]]
df["stacked"]=[True]*(domain.dna_length()-1)+[False]
return [pd.Series(df.loc[i]) for i in df.index],(x,y,list(df["z"])[-1])
[docs]
def gen_strand(strand,design,group_index=0):
domain_list=[]
for do in strand.domains:
if type(do) is sc.scadnano.Domain:
L,(x,y,z)=gen_id_series(do,design)
if len(domain_list)>2:
if type(domain_list[-1]) is pd.DataFrame:
loop=domain_list.pop(-1)
inds=loop.index
loop["x"]=lx+(x-lx)*(loop.index+1)/(len(loop.index)+2)
loop["y"]=ly+(y-ly)*(loop.index+1)/(len(loop.index)+2)
loop["z"]=lz+(L[0]["z"]-lz)*(loop.index+1)/(len(loop.index)+2)
loop["orientation_angle"]=0
L=[pd.Series(loop.loc[i]) for i in loop.index]+L
domain_list=domain_list+L
elif type(do) is sc.scadnano.Loopout:
n=pd.DataFrame(columns=["loopout","x","y","z"],index=range(do.dna_length()),dtype=object)
n["loopout"]=True
n["stacked"]=False
lx,ly,lz=(x,y,z)
loop_out_length=do.dna_length()
domain_list.append(n)
strand_df=pd.DataFrame(domain_list).reset_index(drop=True)
strand_df["threeprime"]=list(strand_df.index[1:]+group_index)+[-1]
stack=np.where(strand_df["stacked"],strand_df["threeprime"],-1)
strand_df["stack"]=stack
try:
if len(strand.dna_sequence)==len(strand_df.index):
strand_df["seq"]=list(strand.dna_sequence)
else:
strand_df["seq"]=-1
except:
strand_df["seq"]=-1
strand_df["index"]=strand_df.index+group_index
#strand_df["seg_index"]=index
return strand_df
[docs]
def gen_prop_table(fname):
sc_design=sc.Design.from_scadnano_file(fname)
nt_list=[]
group_index=0
for i in range(len(sc_design.strands)):
dfi=gen_strand(sc_design.strands[i],sc_design,group_index)
group_index+=len(dfi.index)
nt_list.append(dfi)
nt_prop=pd.concat(nt_list)
nt_prop.set_index("index",inplace=True)
## Generate bps
bp_map={}
for i in nt_prop.index:
if nt_prop["loopout"][i]==False:
bp_map[(nt_prop["vh"][i],nt_prop["zid"][i],nt_prop["fwd"][i])]=i
ind_tuple=list(zip(nt_prop.index,nt_prop["vh"],nt_prop["zid"],nt_prop["fwd"]))
bp=-np.ones(len(nt_prop.index),dtype=int)
for i,j,k,l in ind_tuple:
try:
bp[i]=bp_map[(j,k,not(l))]
except:
continue
nt_prop["bp"]=bp
non_stack_ind,=np.where(nt_prop["stack"]==-1)
for i in non_stack_ind:
zid=int(nt_prop.loc[i]["zid"])+int(nt_prop.loc[i]["fwd"])*2.0-1.0
if (nt_prop.loc[i]["vh"],zid,nt_prop.loc[i]["fwd"]) in bp_map.keys():
nt_prop["stack"][i]=bp_map[(nt_prop.loc[i]["vh"],zid,nt_prop.loc[i]["fwd"])]
else:
continue
nt_prop["r"]=list(np.array([nt_prop["x"],nt_prop["y"],nt_prop["z"]],dtype="<f4").T)
return nt_prop
[docs]
def mrdna_model_from_scadnano(sc_file,seq=None,**model_parameters):
nt_prop=gen_prop_table(sc_file)
r=np.array(list(nt_prop['r']))
bp=np.array(list(nt_prop['bp']))
three_prime=np.array((list(nt_prop["threeprime"])))
stack=np.array((list(nt_prop["stack"])))
if nt_prop["seq"][0]==-1:
seq=None
else:
seq=nt_prop["seq"]
orientation=[rotationAboutAxis(np.array((0,0,1)),i) for i in list(nt_prop["orientation_angle"])]
model = model_from_basepair_stack_3prime( r, bp, stack, three_prime, seq, orientation, **model_parameters )
model._dataframe=nt_prop
return model