forked from satijalab/seurat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtSNE_project.R
113 lines (110 loc) · 3.21 KB
/
tSNE_project.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#
#' @importFrom stats optim
#
project_map <- function(
z,
x_old,
sum_X_old,
x_old_tsne,
P_tol = 5e-6,
perplexity = 30
) {
sum_z <- sum(z ^ 2)
#[email protected][,1:5]
#sum_X_old=rowSums((x_old^2))
D_org <- sum_z + (-2 * as.matrix(x = x_old) %*% t(x = z) + sum_X_old)
P <- d2p_cell(D = D_org, u = perplexity)
nn_points <- which(x = P> P_tol) # Use only a small subset of points to comupute embedding. This keeps all the points that are proximal to the new point
X_nn_set <- x_old[nn_points, ] #Original points
y_nn_set <- x_old_tsne[nn_points, ] #Computed embeddings
P_nn_set <- P[nn_points, ] #Probabilities
y_new0 <- (
t(x = as.matrix(x = y_nn_set)) %*%
t(x = as.matrix(x = rbind(P_nn_set, P_nn_set)))
)[, 1] #Initial guess of point as a weighted average
sink("/dev/null")
y_new <- optim(
par = y_new0,
fn = KullbackFun,
gr = NULL,
y_nn_set,
P_nn_set,
method = "Nelder-Mead"
)
sink()
#plot([email protected])
#points(y_new$par[1],y_new$par[2],col="red",cex=1,pch=16)
#points([email protected][cell.num,1],[email protected][cell.num,2],col="blue",cex=1,pch=16)
#return(dist(as.matrix(rbind(y_new$par,[email protected][cell.num,]))))
return(y_new$par)
}
d2p_cell <- function(D, u = 15, tol = 1e-4) {
betamin = -Inf
betamax = Inf
tries = 0
tol = 1e-4
beta = 1
beta.list <- Hbeta(D = D, beta = beta)
h <- beta.list[[1]]
thisP <- beta.list[[2]]
flagP <- beta.list[[3]]
hdiff <- h - log(x = u)
while (abs(x = hdiff) > tol && tries < 50) {
if (hdiff > 0) {
betamin <- beta
if (betamax == Inf) {
beta <- beta * 2
} else {
beta <- (beta + betamax) / 2
}
} else {
betamax <- beta
if (betamin == -Inf) {
beta <- beta / 2
} else {
beta <- (beta + betamin) / 2
}
}
beta.list <- Hbeta(D = D, beta = beta)
h <- beta.list[[1]]
thisP <- beta.list[[2]]
flagP <- beta.list[[3]]
hdiff <- h - log(x = u)
tries <- tries + 1
}
# set the final row of p
P <- thisP
#Check if there are at least 10 points that are highly similar to the projected point
return(P)
}
KullbackFun <- function(z, y, P) {
#Computes the Kullback-Leibler divergence cost function for the embedding x in a tSNE map
#%P = params{1}; %Transition probabilities in the original space. Nx1 vector
#%y = params{2}; %tSNE embeddings of the training set. Nx2 vector
print(z)
print(dim(x = y))
Cost0 = sum(P * log(x = P)) #Constant part of the cost function
#Compute pairwise distances in embedding space
sum_z <- sum(z ^ 2)
sum_y <- rowSums(x = (y ^ 2))
D_yz <- sum_z +(
-2 * as.matrix(x = y) %*% t(x = matrix(data = z, nrow = 1)) + sum_y
)
Q <- 1 / (1 + D_yz)
Q <- Q / sum(Q) #Transition probabilities in the embedded space
Cost <- Cost0 - sum(P * log(x = Q)) #% - 100 * sum(Q .* log(Q));
return(Cost)
}
Hbeta <- function(D, beta) {
flagP <- 1
P <- exp(x = -D * beta)
sumP <- sum(P)
if (sumP < 1e-8) { #In this case it means that no point is proximal.
P <- rep(length(x = P), 1) / length(x = P)
sumP <- sum(P)
flagP <- 0
}
H <- log(x = sumP) + beta * sum(D * P) / sumP
P <- P / sumP
return(list(H, P, flagP))
}