initial commit

This commit is contained in:
m.zare
2026-04-10 18:25:21 +03:30
commit 77ca6c34a3
263 changed files with 34470 additions and 0 deletions

283
pkg/metrics/metrics.go Normal file
View File

@@ -0,0 +1,283 @@
package metrics
import (
"fmt"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
// Metrics holds all metrics for the base service
type Metrics struct {
// HTTP metrics
HTTPRequest *prometheus.HistogramVec
// Database metrics
DatabaseQuery *prometheus.HistogramVec
// RabbitMQ metrics
RabbitMQMessages *prometheus.HistogramVec
// Business metrics
BusinessOperations *prometheus.HistogramVec
// Cache metrics
Cache *prometheus.HistogramVec
// External service metrics
ExternalServiceCall *prometheus.HistogramVec
// Configuration
namespace string
subsystem string
serviceName string
}
var (
metricsInstance *Metrics
metricsOnce = &sync.Once{}
startTime = time.Now()
)
// GetMetrics returns a singleton instance of Metrics
func GetMetrics(namespace, subsystem, serviceName string) *Metrics {
metricsOnce.Do(func() {
metricsInstance = newMetrics(namespace, subsystem, serviceName)
})
return metricsInstance
}
// newMetrics creates a new instance of Metrics
func newMetrics(namespace, subsystem, serviceName string) *Metrics {
return &Metrics{
namespace: namespace,
subsystem: subsystem,
serviceName: serviceName,
HTTPRequest: promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "http_request_duration_seconds",
Help: "HTTP request duration in seconds",
Buckets: prometheus.DefBuckets,
ConstLabels: prometheus.Labels{"service": serviceName},
},
[]string{"method", "endpoint", "status_code"},
),
DatabaseQuery: promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "database_query_duration_seconds",
Help: "Database query duration in seconds",
Buckets: prometheus.DefBuckets,
ConstLabels: prometheus.Labels{"service": serviceName},
},
[]string{"operation", "table", "error"},
),
// RabbitMQ metrics
RabbitMQMessages: promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "rabbitmq_messages_duration_seconds",
Help: "Duration of RabbitMQ message operations (publish/consume) in seconds",
Buckets: prometheus.DefBuckets,
ConstLabels: prometheus.Labels{"service": serviceName},
},
[]string{"exchange", "routing_key", "action", "error"},
),
// Business metrics
BusinessOperations: promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "business_operations_duration_seconds",
Help: "Duration of business operations in seconds",
Buckets: prometheus.DefBuckets,
ConstLabels: prometheus.Labels{"service": serviceName},
},
[]string{"operation_type", "error"},
),
// Cache metrics
Cache: promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "cache_operations_duration_seconds",
Help: "Duration of store operations in seconds",
Buckets: prometheus.DefBuckets,
ConstLabels: prometheus.Labels{"service": serviceName},
},
[]string{"cache_type", "key_pattern", "action", "hit", "error"},
),
ExternalServiceCall: promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "external_service_duration_seconds",
Help: "External service call duration in seconds",
Buckets: prometheus.DefBuckets,
ConstLabels: prometheus.Labels{"service": serviceName},
},
[]string{"service_name", "endpoint", "error"},
),
}
}
// GetNamespace returns the metrics namespace
func (m *Metrics) GetNamespace() string {
return m.namespace
}
// GetSubsystem returns the metrics subsystem
func (m *Metrics) GetSubsystem() string {
return m.subsystem
}
// GetServiceName returns the service name
func (m *Metrics) GetServiceName() string {
return m.serviceName
}
// GetFullMetricName returns the full metric name with namespace and subsystem
func (m *Metrics) GetFullMetricName(metricName string) string {
return fmt.Sprintf("%s_%s_%s", m.namespace, m.subsystem, metricName)
}
// RecordHTTPRequest HTTP Metrics Functions
func (m *Metrics) RecordHTTPRequest(method, endpoint, statusCode string, duration time.Duration) {
m.HTTPRequest.WithLabelValues(method, endpoint, statusCode).Observe(duration.Seconds())
}
// NormalizePath normalizes HTTP paths by replacing numeric IDs and parameters with placeholders
// This prevents metric cardinality explosion while maintaining meaningful endpoint grouping
func (m *Metrics) NormalizePath(path string) string {
// Replace numeric IDs with :id placeholder
path = regexp.MustCompile(`/\d+`).ReplaceAllString(path, "/:id")
// Replace UUIDs with :uuid placeholder
path = regexp.MustCompile(`/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`).ReplaceAllString(path, "/:uuid")
// Replace other common parameter patterns
path = regexp.MustCompile(`/[a-zA-Z0-9]{20,}`).ReplaceAllString(path, "/:hash") // Long hashes
path = regexp.MustCompile(`/\d{10,}`).ReplaceAllString(path, "/:long_id") // Very long numbers
return path
}
// NormalizeExternalServiceEndpoint normalizes external service endpoint names
// Use this when you have dynamic endpoint names that could cause cardinality issues
func (m *Metrics) NormalizeExternalServiceEndpoint(endpoint string) string {
// Replace numeric IDs with :id placeholder
endpoint = regexp.MustCompile(`\d+`).ReplaceAllString(endpoint, ":id")
// Replace UUIDs with :uuid placeholder
endpoint = regexp.MustCompile(`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`).ReplaceAllString(endpoint, ":uuid")
// Replace other common parameter patterns
endpoint = regexp.MustCompile(`[a-zA-Z0-9]{20,}`).ReplaceAllString(endpoint, ":hash") // Long hashes
endpoint = regexp.MustCompile(`\d{10,}`).ReplaceAllString(endpoint, ":long_id") // Very long numbers
return endpoint
}
// RecordDatabaseQuery Database Metrics Functions
func (m *Metrics) RecordDatabaseQuery(operation, table string, duration time.Duration, err error) {
m.DatabaseQuery.WithLabelValues(operation, table, m.classifyError(err)).Observe(duration.Seconds())
}
// RecordRabbitMQMessage RabbitMQ Metrics Functions
func (m *Metrics) RecordRabbitMQMessage(exchange, routingKey, action string, duration time.Duration, err error) {
m.RabbitMQMessages.WithLabelValues(exchange, routingKey, action, m.classifyError(err)).Observe(duration.Seconds())
}
// RecordBusinessOperation Business Metrics Functions
func (m *Metrics) RecordBusinessOperation(operationType string, err error, duration time.Duration) {
m.BusinessOperations.WithLabelValues(operationType, m.classifyError(err)).Observe(duration.Seconds())
}
// RecordCacheHit Cache Metrics Functions
func (m *Metrics) RecordCacheHit(cacheType, keyPattern, action string, hit bool, err error, duration time.Duration) {
m.Cache.WithLabelValues(cacheType, keyPattern, action, strconv.FormatBool(hit), m.classifyError(err)).Observe(duration.Seconds())
}
// RecordExternalServiceCall External Service Metrics Functions
func (m *Metrics) RecordExternalServiceCall(serviceName, endpoint string, err error, duration time.Duration) {
m.ExternalServiceCall.WithLabelValues(serviceName, endpoint, m.classifyError(err)).Observe(duration.Seconds())
}
// Utility Functions
func (m *Metrics) classifyError(err error) string {
if err == nil {
return "none"
}
errStr := err.Error()
switch {
case strings.Contains(errStr, "connection"):
return "connection_error"
case strings.Contains(errStr, "connection lost"):
return "connection_lost"
case strings.Contains(errStr, "connection reset by peer"):
return "connection_reset_by_peer"
case strings.Contains(errStr, "timeout"):
return "timeout_error"
case strings.Contains(strings.ToLower(errStr), "deadlock"):
return "deadlock_error"
case strings.Contains(errStr, "not found") || strings.Contains(errStr, "NotFound"):
return "not_found_error"
case strings.Contains(errStr, "Duplicate"):
return "duplicate_error"
case strings.Contains(errStr, "permission"):
return "permission_error"
case strings.Contains(errStr, "validation"):
return "validation_error"
case strings.Contains(errStr, "failed to publish") || strings.Contains(errStr, "publish error"):
return "publish_error"
case strings.Contains(errStr, "failed to marshal"):
return "marshal_error"
case strings.Contains(errStr, "failed to save"):
return "save_error"
case strings.Contains(errStr, "too many open files"):
return "too_many_open_files"
case strings.Contains(errStr, "no such file or directory"):
return "no_such_file"
case strings.Contains(errStr, "failed to parse CSV"):
return "parse_csv_error"
case strings.Contains(errStr, "Internal Server Error"):
return "internal_server_error"
default:
return "unknown_error"
}
}
// RecordCacheMetrics records comprehensive store metrics
func (m *Metrics) RecordCacheMetrics(cacheType, keyPattern, action string, hit bool, err error, duration time.Duration) {
m.RecordCacheHit(cacheType, keyPattern, action, hit, err, duration)
}
// RecordDatabaseOperation records comprehensive database operation metrics
func (m *Metrics) RecordDatabaseOperation(operation, table string, duration time.Duration, err error) {
m.RecordDatabaseQuery(operation, table, duration, err)
}
// GetMetricsSummary returns a summary of current metrics
func (m *Metrics) GetMetricsSummary() map[string]interface{} {
return map[string]interface{}{
"uptime_seconds": time.Since(startTime).Seconds(),
"goroutines": runtime.NumGoroutine(),
"start_time": startTime.Format(time.RFC3339),
}
}