SupersetApplicationSpec.java
package com.cloudforgeci.api.application.analytics;
import com.cloudforge.core.annotation.ApplicationPlugin;
import com.cloudforge.core.interfaces.ApplicationSpec;
import com.cloudforge.core.interfaces.DatabaseSpec;
import com.cloudforge.core.interfaces.DatabaseSpec.DatabaseConnection;
import com.cloudforge.core.interfaces.DatabaseSpec.DatabaseRequirement;
import com.cloudforge.core.interfaces.Ec2Context;
import com.cloudforge.core.interfaces.OidcIntegration;
import com.cloudforge.core.interfaces.UserDataBuilder;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Apache Superset Business Intelligence ApplicationSpec implementation.
*
* <p>Superset is a modern data exploration and visualization platform originally
* developed at Airbnb.</p>
*
* <p><strong>Key Features:</strong></p>
* <ul>
* <li>Rich interactive visualizations</li>
* <li>SQL IDE with syntax highlighting</li>
* <li>Semantic layer for defining custom dimensions and metrics</li>
* <li>Support for most SQL-speaking databases</li>
* <li>Extensible security model</li>
* </ul>
*
* <p><strong>Compliance Use Cases:</strong></p>
* <ul>
* <li>SOC2: Security event analytics and metrics dashboards</li>
* <li>GDPR: Data subject rights request tracking</li>
* <li>PCI-DSS: Transaction monitoring and anomaly detection</li>
* </ul>
*
* <p><strong>Fintech Applications:</strong></p>
* <ul>
* <li>Real-time payment transaction dashboards</li>
* <li>Fraud detection and risk analytics</li>
* <li>Financial performance metrics and KPIs</li>
* <li>Customer lifetime value (CLV) analysis</li>
* <li>Regulatory reporting and compliance dashboards</li>
* </ul>
*
* <p><strong>Database Requirements:</strong></p>
* <ul>
* <li><b>REQUIRED:</b> PostgreSQL 10+ or MySQL 5.7+ for metadata storage</li>
* <li>Superset does NOT support SQLite for production (single-process limitation)</li>
* <li>Recommended: PostgreSQL with db.t3.small or larger</li>
* </ul>
*
* <p><strong>Security Note:</strong></p>
* <ul>
* <li>Use database read-only credentials for analytics data sources</li>
* <li>Enable OIDC/LDAP for authentication</li>
* <li>Configure row-level security (RLS)</li>
* <li>Enable audit logging</li>
* </ul>
*
* @see <a href="https://superset.apache.org/docs/intro">Superset Documentation</a>
*/
@ApplicationPlugin(
value = "superset",
category = "analytics",
displayName = "Superset",
description = "Data exploration and visualization platform",
defaultCpu = 1024,
defaultMemory = 2048,
defaultInstanceType = "t3.small",
supportsFargate = true,
supportsEc2 = true,
supportsOidc = false,
supportsDatabase = true,
requiresDatabase = true
)
public class SupersetApplicationSpec implements ApplicationSpec, DatabaseSpec {
private static final String APPLICATION_ID = "superset";
private static final String DEFAULT_IMAGE = "apache/superset:latest";
private static final int APPLICATION_PORT = 8088;
private static final String CONTAINER_DATA_PATH = "/app/superset_home";
private static final String EFS_DATA_PATH = "/superset";
private static final String VOLUME_NAME = "supersetData";
private static final String CONTAINER_USER = "0:0"; // runs as root in container
private static final String EFS_PERMISSIONS = "755";
private static final String EBS_DEVICE_NAME = "/dev/xvdh";
private static final String EC2_DATA_PATH = "/opt/superset/data";
private static final List<String> EC2_LOG_PATHS = List.of(
"/opt/superset/logs/superset.log",
"/var/log/userdata.log"
);
@Override
public String applicationId() {
return APPLICATION_ID;
}
@Override
public String defaultContainerImage() {
return DEFAULT_IMAGE;
}
@Override
public int applicationPort() {
return APPLICATION_PORT;
}
@Override
public String containerDataPath() {
return CONTAINER_DATA_PATH;
}
@Override
public String efsDataPath() {
return EFS_DATA_PATH;
}
@Override
public String volumeName() {
return VOLUME_NAME;
}
@Override
public String containerUser() {
return CONTAINER_USER;
}
@Override
public DatabaseRequirement databaseRequirement() {
// Superset REQUIRES PostgreSQL/MySQL for metadata storage
// SQLite is not suitable for production (single-process limitation)
return DatabaseRequirement.required("postgres", "13")
.withInstanceClass("db.t3.small")
.withStorage(20)
.withDatabaseName("superset");
}
@Override
public Map<String, String> databaseParameters() {
// PostgreSQL optimization for Superset metadata workload
return Map.of(
"max_connections", "150",
"shared_buffers", "{DBInstanceClassMemory/4096}",
"work_mem", "8MB",
"log_statement", "ddl"
);
}
@Override
public int backupRetentionDays() {
return 14; // Superset contains dashboards and analytics metadata
}
@Override
public Map<String, String> containerEnvironmentVariables(String fqdn, boolean sslEnabled, String authMode) {
// Delegate to new method with null database connection for backward compatibility
return containerEnvironmentVariables(fqdn, sslEnabled, authMode, null);
}
/**
* Container environment variables with database connection support.
*
* <p>Configures Superset to use RDS PostgreSQL for metadata storage.
* Superset REQUIRES a database for production deployments.</p>
*/
public Map<String, String> containerEnvironmentVariables(
String fqdn, boolean sslEnabled, String authMode, DatabaseConnection dbConn) {
Map<String, String> environment = new HashMap<>();
// Superset secret key for session encryption
environment.put("SUPERSET_SECRET_KEY", "CHANGE_THIS_TO_A_LONG_RANDOM_STRING");
// Proxy/Load Balancer configuration - CRITICAL for ALB deployments
// Trust X-Forwarded-* headers from ALB for proper IP logging and HTTPS detection
environment.put("ENABLE_PROXY_FIX", "True");
environment.put("PROXY_FIX_X_FOR", "1"); // Number of proxies to trust for X-Forwarded-For
environment.put("PROXY_FIX_X_PROTO", "1"); // Trust X-Forwarded-Proto
environment.put("PROXY_FIX_X_HOST", "1"); // Trust X-Forwarded-Host
environment.put("PROXY_FIX_X_PORT", "1"); // Trust X-Forwarded-Port
environment.put("PROXY_FIX_X_PREFIX", "1"); // Trust X-Forwarded-Prefix
// Database configuration (REQUIRED for Superset)
if (dbConn != null) {
// Use RDS PostgreSQL for metadata storage
environment.put("DATABASE_DIALECT", "postgresql");
environment.put("DATABASE_HOST", dbConn.endpoint());
environment.put("DATABASE_PORT", String.valueOf(dbConn.port()));
environment.put("DATABASE_DB", dbConn.databaseName());
environment.put("DATABASE_USER", dbConn.username());
// Password is injected via ECS secret as GITLAB_DATABASE_PASSWORD
// Don't set DATABASE_PASSWORD here - it will be set by ECS from Secrets Manager
// Build SQLAlchemy connection string using environment variable for password
// Password will be injected at runtime by ECS from Secrets Manager
String password = "${SUPERSET_DATABASE_PASSWORD}";
String sqlalchemyUri = String.format(
"postgresql://%s:%s@%s:%d/%s",
dbConn.username(),
password,
dbConn.endpoint(),
dbConn.port(),
dbConn.databaseName()
);
environment.put("SQLALCHEMY_DATABASE_URI", sqlalchemyUri);
} else {
// NOTE: Superset REQUIRES a database - this should never happen
// Set placeholder that will fail fast if database is missing
environment.put("DATABASE_DIALECT", "postgresql");
environment.put("SQLALCHEMY_DATABASE_URI", "postgresql://MISSING_DATABASE_CONNECTION");
}
return environment;
}
@Override
public String efsPermissions() {
return EFS_PERMISSIONS;
}
@Override
public String ebsDeviceName() {
return EBS_DEVICE_NAME;
}
@Override
public String ec2DataPath() {
return EC2_DATA_PATH;
}
@Override
public List<String> ec2LogPaths() {
return EC2_LOG_PATHS;
}
@Override
public void configureUserData(UserDataBuilder builder, Ec2Context context) {
builder.addSystemUpdate();
// Install Docker and Docker Compose
builder.addCommands(
"# Install Docker",
"yum install -y docker",
"systemctl enable docker",
"systemctl start docker",
"echo 'Docker installed' >> /var/log/userdata.log",
"",
"# Install Docker Compose",
"curl -L \"https://github.com/docker/compose/releases/download/v2.20.0/docker-compose-$(uname -s)-$(uname -m)\" -o /usr/local/bin/docker-compose",
"chmod +x /usr/local/bin/docker-compose",
"echo 'Docker Compose installed' >> /var/log/userdata.log"
);
// Install CloudWatch Agent
String logGroupName = String.format("/aws/%s/%s/%s",
context.stackName(),
context.runtimeType(),
context.securityProfile());
builder.installCloudWatchAgent(logGroupName, ec2LogPaths());
// Mount storage
String[] userParts = containerUser().split(":");
String uid = userParts[0];
String gid = userParts[1];
if (context.hasEfs()) {
builder.mountEfs(
context.efsId().orElseThrow(),
context.accessPointId().orElseThrow(),
ec2DataPath(),
uid,
gid
);
} else {
builder.mountEbs(
ebsDeviceName(),
ec2DataPath(),
uid,
gid
);
}
// Create directory structure
builder.addCommands(
"# Create Superset directories",
"mkdir -p /opt/superset/logs",
"mkdir -p /opt/superset/config",
"",
"# Create Superset configuration",
"cat > /opt/superset/config/superset_config.py <<'EOF'",
"# Superset Configuration",
"",
"import os",
"",
"# Flask App Builder configuration",
"ROW_LIMIT = 5000",
"",
"# Flask Secret Key - retrieved from environment",
"SECRET_KEY = os.environ.get('SUPERSET_SECRET_KEY', 'CHANGE_THIS_IN_PRODUCTION')",
"",
"# SQLAlchemy database URI for Superset metadata",
"SQLALCHEMY_DATABASE_URI = 'sqlite:////app/superset_home/superset.db'",
"",
"# For production, use PostgreSQL:",
"# SQLALCHEMY_DATABASE_URI = 'postgresql://superset:password@postgres:5432/superset'",
"",
"# Superset specific config",
"WTF_CSRF_ENABLED = True",
"",
"# Set this API key to enable Mapbox visualizations",
"MAPBOX_API_KEY = os.getenv('MAPBOX_API_KEY', '')",
"EOF"
);
// Run Superset container
builder.addCommands(
"# Generate secure secret key and admin password",
"SUPERSET_SECRET_KEY=$(aws secretsmanager get-secret-value --secret-id ${STACK_NAME:-superset}/secret-key --query SecretString --output text 2>/dev/null || openssl rand -base64 32)",
"SUPERSET_ADMIN_PASSWORD=$(aws secretsmanager get-secret-value --secret-id ${STACK_NAME:-superset}/admin-password --query SecretString --output text 2>/dev/null || openssl rand -base64 16)",
"echo \"Generated Superset admin password (save this): $SUPERSET_ADMIN_PASSWORD\" >> /var/log/userdata.log",
"",
"# Run Superset container",
"docker run -d \\",
" --name superset \\",
" -p 8088:8088 \\",
" -v " + ec2DataPath() + ":/app/superset_home \\",
" -v /opt/superset/config:/app/docker \\",
" -e SUPERSET_SECRET_KEY=\"$SUPERSET_SECRET_KEY\" \\",
" " + DEFAULT_IMAGE,
"",
"# Wait for container to start",
"sleep 10",
"",
"# Initialize Superset database",
"docker exec superset superset db upgrade",
"",
"# Create admin user with generated password",
"docker exec superset superset fab create-admin \\",
" --username admin \\",
" --firstname Admin \\",
" --lastname User \\",
" --email admin@example.com \\",
" --password \"$SUPERSET_ADMIN_PASSWORD\"",
"",
"# Initialize Superset",
"docker exec superset superset init",
"",
"echo 'Superset initialization complete' >> /var/log/userdata.log",
"echo 'Superset should be available on port 8088' >> /var/log/userdata.log",
"",
"cat >> /var/log/userdata.log <<'INSTRUCTIONS'",
"================================================================================",
"SUPERSET POST-DEPLOYMENT SETUP",
"================================================================================",
"",
"1. Access Superset:",
" - Navigate to http://superset.example.com:8088",
" - Login with: admin / admin",
" - CHANGE THE PASSWORD IMMEDIATELY!",
"",
"2. Connect to databases:",
" - Go to Data > Databases",
" - Add PostgreSQL, MySQL, or other data sources",
" - Use read-only credentials for analytics queries",
"",
"3. Configure OIDC authentication:",
" - Edit superset_config.py",
" - Configure Flask-OIDC or Flask-AppBuilder OIDC",
"",
"4. Enable row-level security:",
" - Go to Security > Row Level Security",
" - Define RLS filters for sensitive data",
"",
"5. Security hardening:",
" - Change SECRET_KEY in superset_config.py",
" - Use PostgreSQL instead of SQLite for metadata",
" - Enable HTTPS/TLS",
" - Configure CORS if embedding dashboards",
"",
"6. Production configuration:",
" # Retrieve secrets from Secrets Manager:",
" DB_PASS=$(aws secretsmanager get-secret-value --secret-id superset/db-password --query SecretString --output text)",
" docker run -d \\",
" --name superset \\",
" -e SUPERSET_SECRET_KEY=$(openssl rand -base64 32) \\",
" -e DATABASE_DB=superset \\",
" -e DATABASE_HOST=postgres.example.com \\",
" -e DATABASE_PASSWORD=$DB_PASS \\",
" -e DATABASE_USER=superset \\",
" apache/superset",
"================================================================================",
"INSTRUCTIONS"
);
}
@Override
public boolean supportsOidcIntegration() {
// Superset supports OIDC via Flask-AppBuilder but requires custom configuration
// The @ApplicationPlugin annotation sets supportsOidc = false to match this
return false;
}
@Override
public OidcIntegration getOidcIntegration() {
// OIDC not supported - requires custom superset_config.py configuration
return null;
}
@Override
public String toString() {
return "SupersetApplicationSpec{" +
"applicationId='" + APPLICATION_ID + '\'' +
", defaultImage='" + DEFAULT_IMAGE + '\'' +
", applicationPort=" + APPLICATION_PORT +
'}';
}
}