import scmra
import pandas as pd
import numpy as np
import seaborn as sns; sns.set_theme()
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import networkx as nx

#read in data
data = pd.read_csv("../data/scData_wildtype.tsv", sep='\t')


#add a SC-ID
data.index = ["ID."+str(i) for i in range(data.shape[0])]

#calculate input: deviatipon of protein from cell population mean
dat_scaled = ((data - data.median())/data.median()).transpose()

#get active protein deviations (frmo those we infer interaction strengths)
rglob = dat_scaled.loc[[n for n in list(dat_scaled.index) if ("Active" in n)]]
rglob.index = [i.replace("Active", "") for i in rglob.index]

#get total protein deviations: 'natural perturbations'
rtot = dat_scaled.loc[[n for n in list(dat_scaled.index) if ("Tot" in n)]]
rtot.index = [i.replace("Tot", "") for i in rtot.index]

display(rglob.head)

<bound method NDFrame.head of             ID.0      ID.1      ID.2      ID.3      ID.4      ID.5      ID.6  \
Sos    -0.051155  0.085232  0.008321  0.073530  0.037336  0.077462  0.092302   
Ras    -0.076395  0.054095 -0.007728  0.025145  0.028869  0.058157  0.088900   
Raf1   -0.103680  0.069777  0.053183  0.041268  0.066881  0.064294  0.005852   
Mek    -0.033101 -0.152314 -0.050093 -0.128995  0.069491 -0.148370  0.166537   
Erk    -0.106257 -0.177075  0.030958 -0.113891  0.051923 -0.161771  0.092711   
P90Rsk -0.129504 -0.117604 -0.007665 -0.074405 -0.126830 -0.019424  0.120107   
PI3K    0.142959  0.021876 -0.143707  0.023346  0.029151  0.198616  0.008005   
Akt     0.116549 -0.098978 -0.143954 -0.084988  0.046098  0.104219  0.110371   
C3G     0.007717 -0.082415 -0.006071 -0.138128  0.056060 -0.083842 -0.024886   
Rap1   -0.005878 -0.069144  0.016994 -0.137141  0.027891 -0.114642  0.005731   
bRaf    0.072555 -0.084724 -0.030218 -0.107014  0.048830 -0.116599  0.097450   

            ID.7      ID.8      ID.9  ...    ID.240    ID.241    ID.242  \
Sos    -0.008603 -0.222532  0.126810  ... -0.094700 -0.237459 -0.100487   
Ras     0.004750 -0.215548  0.117134  ... -0.117030 -0.264199 -0.082735   
Raf1    0.066933 -0.259652  0.135834  ... -0.178845 -0.324005 -0.072003   
Mek     0.058568  0.107128 -0.142194  ...  0.144093  0.070434  0.045346   
Erk     0.024951  0.221935 -0.127559  ...  0.143330  0.013953  0.156041   
P90Rsk  0.015984  0.237644 -0.188018  ... -0.059097 -0.065432  0.160903   
PI3K    0.087303 -0.074450 -0.020944  ... -0.029353 -0.002874  0.065850   
Akt    -0.176313  0.044074 -0.082037  ...  0.116220  0.103712 -0.074635   
C3G     0.005292  0.115988  0.113998  ...  0.129428  0.128431 -0.017273   
Rap1    0.009414  0.123415  0.086880  ...  0.135517  0.103057 -0.037426   
bRaf    0.006757  0.086650  0.085719  ...  0.119990  0.073498  0.017430   

          ID.243    ID.244    ID.245    ID.246    ID.247    ID.248    ID.249  
Sos    -0.036475 -0.165083 -0.107879 -0.035377  0.137442 -0.172961  0.087074  
Ras    -0.020628 -0.175497 -0.123056 -0.007759  0.112728 -0.195095  0.052859  
Raf1    0.006394 -0.207331 -0.106979 -0.037144  0.126883 -0.212635  0.137019  
Mek    -0.002845  0.109314 -0.093608 -0.074491  0.002844  0.118653  0.027718  
Erk     0.097090  0.088938 -0.045883 -0.029147  0.009935  0.135879  0.079179  
P90Rsk  0.065053  0.084095  0.037934  0.039865 -0.026981  0.207729 -0.072461  
PI3K   -0.032113 -0.083988  0.228363 -0.054729  0.055028  0.149363  0.021098  
Akt     0.039172 -0.015887 -0.040904  0.200536  0.037683 -0.029558 -0.143577  
C3G    -0.081234  0.001695  0.079890 -0.102714 -0.138296  0.202136 -0.082332  
Rap1   -0.080556 -0.001542  0.051694 -0.083326 -0.155609  0.199343 -0.076946  
bRaf   -0.050971  0.000265  0.014805 -0.022855 -0.099917  0.113333 -0.068884  

[11 rows x 250 columns]>


%%capture

etaRange = numbers = np.linspace(0.0001, 0.75, 20)
dfEdgeEstimation = pd.DataFrame(columns=['Eta', 'MSE', '#Edge'])
for eta in etaRange:

    #make MRA simulation
    scd = scmra.ScData(rglob=rglob, rtot = rtot)
    p = scmra.ScMraProblem(scd, eta=eta)
    p.cpx.solve()
    result = scmra.ScMraResult(p)

    #store number of edges/ Mean Sqared Error/ penalty
    n_res = np.size(result.residuals_complete) + np.size(result.residuals_incomplete)
    error = np.sum(np.array(np.square(result.residuals_complete)))
    error += np.sum(np.array(np.square(result.residuals_incomplete)))
    error = error/n_res
    edgeNum = result.imap.sum().sum()
    row = {'Eta': [eta], 'MSE': [error], '#Edge': [edgeNum]}
    dfEdgeEstimation = pd.concat([dfEdgeEstimation, pd.DataFrame(row)])


with plt.style.context("seaborn-whitegrid"):
    plt.rcParams["axes.edgecolor"] = "0.15"
    plt.rcParams["axes.linewidth"]  = 1.25
    f, ax = plt.subplots(figsize=(5, 5))
    wt = ax.scatter(dfEdgeEstimation["#Edge"], dfEdgeEstimation["MSE"], color = "#0138FE", s = 150)
    ax.plot(ls="--", c=".3")
    plt.xlabel('#Edges', size = 24)
    plt.ylabel('Mean Squared Error', size = 24)
    plt.show()

display(dfEdgeEstimation)


%%capture

#make MRA simulation
eta = 0.1
scd = scmra.ScData(rglob=rglob, rtot = rtot)
p = scmra.ScMraProblem(scd, eta=eta)
p.cpx.solve()
result = scmra.ScMraResult(p)


display(result.rloc)

#diagonal rloc entries are per definition -1, replace those with 0 to plot network
A = result.rloc.transpose()
A[A == -1] = 0
G = nx.from_pandas_adjacency(A, create_using=nx.DiGraph)

pos=nx.shell_layout(G)
nx.draw_networkx(G, with_labels=True, node_size = 750, pos = pos, node_color = "yellow")
#create edge labels
edgeLabels = {}
edgeColor = {}
for row_label, row_data in A.iterrows():
    for col_label, value in row_data.items():
        key = (col_label, row_label)
        if(np.abs(value) > 0):
            edgeLabels[key] = round(value,1)

nx.draw_networkx_edge_labels(
    G, pos,
    edge_labels=edgeLabels,
    font_color="black"
)
plt.show()

Eta	MSE	#Edge
0.000100	0.000058	25
0.039568	0.000079	11
0.079037	0.000118	10
0.118505	0.000118	10
0.157974	0.000379	8
0.197442	0.000379	8
0.236911	0.000379	8
0.276379	0.000379	8
0.315847	0.000379	8
0.355316	0.000379	8
0.394784	0.000379	8
0.434253	0.000931	7
0.473721	0.001495	6
0.513189	0.001495	6
0.552658	0.001495	6
0.592126	0.003341	4
0.631595	0.005650	2
0.671063	0.005650	2
0.710532	0.008861	0
0.750000	0.008861	0

	Sos	Ras	Raf1	Mek	Erk	P90Rsk	PI3K	Akt	C3G	Rap1	bRaf
Sos	-1.000000	0.0000	0.0	0.000000	0.00000	-0.798104	0.000000	0.000000	0.000000	0.000000	0.000000
Ras	0.997905	-1.0000	0.0	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
Raf1	0.000000	1.0035	-1.0	0.000000	0.00000	0.000000	0.000000	-0.401177	0.000000	0.000000	0.000000
Mek	0.000000	0.0000	0.0	-1.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.879479
Erk	0.000000	0.0000	0.0	1.002274	-1.00000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
P90Rsk	0.000000	0.0000	0.0	0.000000	0.93911	-1.000000	0.000000	0.000000	0.000000	0.000000	0.000000
PI3K	0.000000	0.0000	0.0	0.000000	0.00000	0.000000	-1.000000	0.000000	0.000000	0.000000	0.000000
Akt	0.000000	0.0000	0.0	0.000000	0.00000	0.000000	0.379765	-1.000000	0.000000	0.000000	0.000000
C3G	0.000000	0.0000	0.0	0.000000	0.00000	0.000000	0.000000	0.000000	-1.000000	0.000000	0.000000
Rap1	0.000000	0.0000	0.0	0.000000	0.00000	0.000000	0.000000	0.000000	1.000372	-1.000000	0.000000
bRaf	0.000000	0.0000	0.0	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.776643	-1.000000

Single Cell Network Reconstruction with scMRA¶

Data Preparation¶

Define Network Sparcity¶

Reconstruct the network¶