## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse   = TRUE,
  comment    = "#>",
  fig.width  = 6,
  fig.height = 4,
  fig.align  = "center"
)
set.seed(42)

## ----load-package-------------------------------------------------------------
library(sparsecommunity)

## ----sim-sbm------------------------------------------------------------------
# Two balanced communities, n = 300 nodes
# Within-community edge probability: 0.25; between: 0.04
B_sbm <- matrix(c(0.25, 0.04,
                   0.04, 0.25), nrow = 2)

sim_sbm <- simulate_sbm(n = 300, K = 2, B = B_sbm, seed = 1)
print(sim_sbm)

## ----sbm-structure------------------------------------------------------------
# Mean degree (note: sparse regime ~ log(n)/n * n = log(n))
mean(Matrix::rowSums(sim_sbm$A))

## ----fit-sbm------------------------------------------------------------------
fit_sbm <- community_detect(sim_sbm$A, K = 2, model = "sbm", seed = 1)
print(fit_sbm)

## ----sbm-components-----------------------------------------------------------
# Top-K eigenvalues of the regularized Laplacian
fit_sbm$eigenvalues

# Community sizes
table(fit_sbm$labels)

## ----sbm-accuracy-------------------------------------------------------------
misclustering_rate(sim_sbm$labels, fit_sbm$labels)

## ----sbm-embedding, fig.cap="Spectral embedding for SBM. Points are colored by true community."----
U <- fit_sbm$embedding
plot(U[, 1], U[, 2],
     col  = sim_sbm$labels + 1,
     pch  = 19, cex = 0.6,
     xlab = "Eigenvector 1",
     ylab = "Eigenvector 2",
     main = "SBM: spectral embedding")
legend("topright", legend = c("Community 1", "Community 2"),
       col = 2:3, pch = 19, bty = "n")

## ----sim-dcsbm----------------------------------------------------------------
# Three communities with strong degree heterogeneity
B_dcsbm <- matrix(c(0.5, 0.04, 0.04,
                     0.04, 0.5, 0.04,
                     0.04, 0.04, 0.5), nrow = 3)

# Degree parameters: Uniform(0.3, 1.7), creating substantial heterogeneity
set.seed(2)
theta <- runif(400, min = 0.3, max = 1.7)

sim_dcsbm <- simulate_dcsbm(n = 400, K = 3, B = B_dcsbm,
                              theta = theta, seed = 2)
print(sim_dcsbm)

## ----dcsbm-sbm-fail-----------------------------------------------------------
fit_wrong <- community_detect(sim_dcsbm$A, K = 3, model = "sbm", seed = 2)
cat("Misclustering rate (SBM method on DCSBM data):",
    misclustering_rate(sim_dcsbm$labels, fit_wrong$labels), "\n")

## ----fit-dcsbm----------------------------------------------------------------
fit_dcsbm <- community_detect(sim_dcsbm$A, K = 3, model = "dcsbm", seed = 2)
print(fit_dcsbm)

cat("Misclustering rate (DCSBM method):",
    misclustering_rate(sim_dcsbm$labels, fit_dcsbm$labels), "\n")

## ----dcsbm-embedding, fig.cap="Row-normalized spectral embedding for DCSBM. Colors indicate true communities."----
U_dc <- fit_dcsbm$embedding
plot(U_dc[, 1], U_dc[, 2],
     col  = sim_dcsbm$labels + 1,
     pch  = 19, cex = 0.5,
     xlab = "Eigenvector 1 (normalized)",
     ylab = "Eigenvector 2 (normalized)",
     main = "DCSBM: row-normalized spectral embedding")
legend("topright",
       legend = paste("Community", 1:3),
       col = 2:4, pch = 19, bty = "n")

## ----karate-data, message=FALSE-----------------------------------------------
if (!requireNamespace("igraphdata", quietly = TRUE)) {
  message("igraphdata not installed; skipping real-data example.")
  knitr::knit_exit()
}

library(igraph)
data("karate", package = "igraphdata")

# Extract adjacency matrix and true community labels
A_karate  <- igraph::as_adjacency_matrix(karate, sparse = TRUE)
true_comm <- igraph::V(karate)$Faction
cat("Nodes:", vcount(karate), "| Edges:", ecount(karate),
    "| Communities:", length(unique(true_comm)), "\n")
cat("Community sizes:", table(true_comm), "\n")
cat("Mean degree:", round(mean(degree(karate)), 2), "\n")

## ----karate-fit---------------------------------------------------------------
fit_karate <- community_detect(A_karate, K = 2, model = "sbm",
                                n_init = 30, seed = 42)
summary(fit_karate)

cat("Misclustering rate:", misclustering_rate(true_comm, fit_karate$labels), "\n")

## ----karate-plot, fig.cap="Karate club network. Node color = detected community; node shape = true faction."----
# Plot network colored by detected community
shape_map <- ifelse(true_comm == 1, "circle", "square")
igraph::plot.igraph(
  karate,
  vertex.color = fit_karate$labels + 1,
  vertex.shape = shape_map,
  vertex.size  = 8,
  vertex.label = NA,
  main         = "Karate club: detected vs. true communities"
)
legend("bottomleft",
       legend = c("Detected: 1", "Detected: 2"),
       fill   = 2:3, bty = "n", cex = 0.9)
legend("bottomright",
       legend = c("True: faction 1", "True: faction 2"),
       pch    = c(19, 15), bty = "n", cex = 0.9)

## ----football-data------------------------------------------------------------
data("football")
cat("Nodes:", nrow(football_A), "| Edges:", sum(football_A) / 2, "\n")
cat("Mean degree:", round(mean(Matrix::rowSums(football_A)), 2),
    "  log(n):", round(log(nrow(football_A)), 2), "\n")
table(football_labels)   # 12 conferences

## ----football-estimate-K------------------------------------------------------
estimate_K(football_A, K_max = 15)   # true K = 12

## ----football-fit-------------------------------------------------------------
fit_football <- community_detect(football_A, K = 12, model = "sbm",
                                  n_init = 30, seed = 1)
misclustering_rate(football_labels, fit_football$labels)

## ----football-plot, fig.cap="Spectral embedding of the football network. Colors indicate detected community; the 12 athletic conferences are largely separated."----
plot(fit_football)

