Files
s3-genindex/cmd/s3-genindex/main.go

426 lines
12 KiB
Go

package main
import (
"context"
"flag"
"fmt"
"log"
"net/url"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/service/s3"
"git.ipng.ch/ipng/s3-genindex/internal/indexgen"
)
// S3Config holds S3 connection configuration
type S3Config struct {
Endpoint string
Bucket string
Region string
UseSSL bool
}
// S3Object represents an S3 object
type S3Object struct {
Key string
Size int64
LastModified time.Time
}
// parseS3URL parses S3 URL and extracts endpoint and bucket
// Example: http://minio0.chbtl0.net.ipng.ch:9000/ctlog-ro
// Returns: endpoint=minio0.chbtl0.net.ipng.ch:9000, bucket=ctlog-ro, useSSL=false
func parseS3URL(s3URL string) (*S3Config, error) {
u, err := url.Parse(s3URL)
if err != nil {
return nil, fmt.Errorf("failed to parse URL: %w", err)
}
if u.Scheme != "http" && u.Scheme != "https" {
return nil, fmt.Errorf("unsupported scheme: %s", u.Scheme)
}
// Extract bucket from path
path := strings.Trim(u.Path, "/")
if path == "" {
return nil, fmt.Errorf("bucket name not found in URL path")
}
// For MinIO/S3 URLs like http://host:port/bucket, the bucket is the first path segment
bucket := strings.Split(path, "/")[0]
config := &S3Config{
Endpoint: u.Host,
Bucket: bucket,
Region: "us-east-1", // Default region
UseSSL: u.Scheme == "https",
}
return config, nil
}
// processS3Bucket processes an S3 bucket and generates index files
func processS3Bucket(s3Config *S3Config, opts *indexgen.Options) error {
// Get credentials from environment variables
accessKey := os.Getenv("AWS_ACCESS_KEY_ID")
secretKey := os.Getenv("AWS_SECRET_ACCESS_KEY")
if accessKey == "" || secretKey == "" {
return fmt.Errorf("AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables must be set")
}
// Create AWS config with custom endpoint and credentials
cfg := aws.Config{
Region: s3Config.Region,
Credentials: credentials.NewStaticCredentialsProvider(accessKey, secretKey, ""),
}
// Create S3 client with custom endpoint resolver
client := s3.NewFromConfig(cfg, func(o *s3.Options) {
o.BaseEndpoint = aws.String(fmt.Sprintf("%s://%s",
map[bool]string{true: "https", false: "http"}[s3Config.UseSSL],
s3Config.Endpoint))
o.UsePathStyle = true // Use path-style URLs for MinIO compatibility
})
// List objects in the bucket
ctx := context.Background()
input := &s3.ListObjectsV2Input{
Bucket: aws.String(s3Config.Bucket),
}
if opts.Verbose {
log.Printf("Listing objects in S3 bucket: %s", s3Config.Bucket)
}
result, err := client.ListObjectsV2(ctx, input)
if err != nil {
return fmt.Errorf("failed to list S3 objects: %w", err)
}
// Collect all S3 objects
var allObjects []S3Object
for _, obj := range result.Contents {
if obj.Key == nil {
continue
}
keyName := *obj.Key
// Skip if excluded by regex
if opts.ExcludeRegex != nil && opts.ExcludeRegex.MatchString(keyName) {
continue
}
// Skip hidden files if not included
if !opts.IncludeHidden && strings.HasPrefix(keyName, ".") {
continue
}
// Skip index.html files unless ShowIndexFiles is enabled
if !opts.ShowIndexFiles && strings.HasSuffix(keyName, opts.OutputFile) {
continue
}
// Simple glob matching for filter
if opts.Filter != "*" && opts.Filter != "" {
matched, err := filepath.Match(opts.Filter, keyName)
if err != nil || !matched {
continue
}
}
allObjects = append(allObjects, S3Object{
Key: keyName,
Size: *obj.Size,
LastModified: *obj.LastModified,
})
if opts.Verbose {
log.Printf("Found object: %s (%s)", keyName, indexgen.PrettySize(*obj.Size))
}
}
// Process hierarchical directory structure
return processS3Hierarchy(allObjects, opts, client, s3Config)
}
// processS3Hierarchy processes S3 objects hierarchically like filesystem directories
func processS3Hierarchy(objects []S3Object, opts *indexgen.Options, client *s3.Client, s3Config *S3Config) error {
// Group objects by directory path
dirMap := make(map[string][]indexgen.FileEntry)
// Track all directory paths we need to create indexes for
allDirs := make(map[string]bool)
for _, obj := range objects {
// Split the key into directory parts
parts := strings.Split(obj.Key, "/")
if len(parts) == 1 {
// Root level file
entry := createFileEntry(obj, obj.Key)
dirMap[""] = append(dirMap[""], entry)
} else {
// File in a subdirectory
fileName := parts[len(parts)-1]
dirPath := strings.Join(parts[:len(parts)-1], "/")
// Create file entry
entry := createFileEntry(obj, fileName)
dirMap[dirPath] = append(dirMap[dirPath], entry)
// Track all parent directories
currentPath := ""
for i, part := range parts[:len(parts)-1] {
if i == 0 {
currentPath = part
} else {
currentPath = currentPath + "/" + part
}
allDirs[currentPath] = true
}
}
}
// Add directory entries to parent directories
for dirPath := range allDirs {
parentPath := ""
if strings.Contains(dirPath, "/") {
parts := strings.Split(dirPath, "/")
parentPath = strings.Join(parts[:len(parts)-1], "/")
}
dirName := filepath.Base(dirPath)
// Build the correct path for S3
dirEntryPath := dirPath + "/"
if opts.DirAppend {
dirEntryPath += opts.OutputFile
}
dirEntry := indexgen.FileEntry{
Name: dirName,
Path: dirEntryPath,
IsDir: true,
Size: -1,
IsSymlink: false,
IconType: "folder",
CSSClass: "folder_filled",
SizePretty: "—",
ModTimeISO: time.Now().Format(time.RFC3339),
ModTimeHuman: time.Now().Format(time.RFC822),
}
dirMap[parentPath] = append(dirMap[parentPath], dirEntry)
}
// Set TopDir to bucket name for template generation
opts.TopDir = s3Config.Bucket
// Generate index.html for each directory
for dirPath, entries := range dirMap {
indexKey := dirPath
if indexKey != "" {
indexKey += "/"
}
indexKey += opts.OutputFile
err := generateS3HTML(entries, opts, client, s3Config, indexKey)
if err != nil {
return fmt.Errorf("failed to generate index for %s: %w", dirPath, err)
}
}
return nil
}
// createFileEntry creates a FileEntry from an S3Object
func createFileEntry(obj S3Object, displayName string) indexgen.FileEntry {
return indexgen.FileEntry{
Name: displayName,
Path: displayName,
IsDir: false,
Size: obj.Size,
ModTime: obj.LastModified,
IsSymlink: false,
IconType: indexgen.GetIconType(displayName),
CSSClass: "file",
SizePretty: indexgen.PrettySize(obj.Size),
ModTimeISO: obj.LastModified.Format(time.RFC3339),
ModTimeHuman: obj.LastModified.Format(time.RFC822),
}
}
// generateS3HTML generates HTML index for S3 objects and uploads to S3
func generateS3HTML(entries []indexgen.FileEntry, opts *indexgen.Options, client *s3.Client, s3Config *S3Config, indexKey string) error {
// Sort entries by name (similar to filesystem behavior)
sort.Slice(entries, func(i, j int) bool {
return entries[i].Name < entries[j].Name
})
// Use the provided index key
// Get the HTML template
tmpl := indexgen.GetHTMLTemplate()
if tmpl == nil {
return fmt.Errorf("failed to get HTML template")
}
// Determine if we're at root level (no parent directory)
isRoot := (indexKey == opts.OutputFile) // root level index.html
// Prepare template data (similar to ProcessDir in indexgen)
data := struct {
DirName string
Entries []indexgen.FileEntry
DirAppend bool
OutputFile string
IsRoot bool
}{
DirName: opts.TopDir, // Use bucket name as directory name
Entries: entries,
DirAppend: opts.DirAppend,
OutputFile: opts.OutputFile,
IsRoot: isRoot,
}
// Generate HTML content in memory
var htmlBuffer strings.Builder
err := tmpl.Execute(&htmlBuffer, data)
if err != nil {
return fmt.Errorf("failed to execute template: %w", err)
}
htmlContent := htmlBuffer.String()
if opts.DryRun {
// Dry run mode: show what would be written but don't upload
fmt.Printf("Would upload S3 index file: s3://%s/%s\n", s3Config.Bucket, indexKey)
fmt.Printf("Directory level: %s\n", strings.TrimSuffix(indexKey, "/"+opts.OutputFile))
fmt.Printf("Objects found: %d\n", len(entries))
fmt.Printf("Generated HTML size: %d bytes\n", len(htmlContent))
for _, entry := range entries {
entryType := "file"
if entry.IsDir {
entryType = "directory"
}
fmt.Printf(" %s: %s (%s)\n", entryType, entry.Name, entry.SizePretty)
}
return nil
}
// Upload HTML to S3
ctx := context.Background()
putInput := &s3.PutObjectInput{
Bucket: aws.String(s3Config.Bucket),
Key: aws.String(indexKey),
Body: strings.NewReader(htmlContent),
ContentType: aws.String("text/html"),
}
_, err = client.PutObject(ctx, putInput)
if err != nil {
return fmt.Errorf("failed to upload %s to S3: %w", indexKey, err)
}
if opts.Verbose {
log.Printf("Uploaded index file: %s to S3 bucket %s (%d entries)", indexKey, s3Config.Bucket, len(entries))
}
return nil
}
func main() {
var opts indexgen.Options
var excludeRegexStr string
var directory string
var s3URL string
var dryRun bool
var showIndexFiles bool
// Set defaults
opts.DirAppend = true
opts.OutputFile = indexgen.DefaultOutputFile
opts.Recursive = true
opts.IncludeHidden = true
flag.StringVar(&directory, "d", "", "local directory to process")
flag.StringVar(&s3URL, "s3", "", "S3 URL to process")
flag.StringVar(&opts.Filter, "f", "*", "only include files matching glob")
flag.BoolVar(&dryRun, "n", false, "dry run: show what would be written without actually writing")
flag.StringVar(&excludeRegexStr, "x", "", "exclude files matching regular expression")
flag.BoolVar(&opts.Verbose, "v", false, "verbosely list every processed file")
flag.BoolVar(&showIndexFiles, "i", false, "show index.html files in directory listings")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Generate directory index files (recursive is ON, hidden files included by default).\n")
fmt.Fprintf(os.Stderr, "Output file is 'index.html', directory href appending is enabled.\n")
fmt.Fprintf(os.Stderr, "Specify either -d <directory> OR -s3 <url> (mutually exclusive).\n")
fmt.Fprintf(os.Stderr, "S3 URLs: http://host:port/bucket or https://host/bucket\n")
fmt.Fprintf(os.Stderr, "For S3, set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.\n")
fmt.Fprintf(os.Stderr, "Optionally filter by file types with -f \"*.py\".\n\n")
fmt.Fprintf(os.Stderr, "Usage: %s [OPTIONS]\n\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Examples:\n")
fmt.Fprintf(os.Stderr, " %s -d /path/to/dir\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s -s3 http://minio.example.com:9000/bucket\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s -v -f \"*.log\" -s3 https://s3.amazonaws.com/logs\n\n", os.Args[0])
flag.PrintDefaults()
}
flag.Parse()
// Check mutual exclusion and that exactly one option is provided
if directory == "" && s3URL == "" {
fmt.Fprintf(os.Stderr, "Error: Either -d <directory> or -s3 <url> must be specified.\n\n")
flag.Usage()
os.Exit(1)
}
if directory != "" && s3URL != "" {
fmt.Fprintf(os.Stderr, "Error: -d and -s3 flags are mutually exclusive. Use only one.\n\n")
flag.Usage()
os.Exit(1)
}
if excludeRegexStr != "" {
var err error
opts.ExcludeRegex, err = regexp.Compile(excludeRegexStr)
if err != nil {
log.Fatal("Invalid regular expression:", err)
}
}
// Set dry run and show index files flags
opts.DryRun = dryRun
opts.ShowIndexFiles = showIndexFiles
if s3URL != "" {
// Parse S3 URL
s3Config, err := parseS3URL(s3URL)
if err != nil {
log.Fatal("Failed to parse S3 URL:", err)
}
// Process S3 bucket
err = processS3Bucket(s3Config, &opts)
if err != nil {
log.Fatal("Failed to process S3 bucket:", err)
}
} else {
// Process local directory
opts.TopDir = directory
err := indexgen.ProcessDir(opts.TopDir, &opts)
if err != nil {
log.Fatal(err)
}
}
}