Regarding concurrency issues with the Reconcile method in the PodGroup controller

Question

Regarding concurrency issues with the Reconcile method in the PodGroup controller

googs1025 opened this issue 3 months ago · comments

k8s operator controller operates concurrently, and we should use locks in the Reconcile method to address concurrency issues, right? I think it is necessary, but if my understanding is incorrect, I will close the issue. If locks are indeed required, I will try to resolve it.

func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
	log := log.FromContext(ctx)
	log.Info("reconciling")
	pg := &schedv1alpha1.PodGroup{}
	if err := r.Get(ctx, req.NamespacedName, pg); err != nil {
		if apierrs.IsNotFound(err) {
			log.V(5).Info("Pod group has been deleted")
			return ctrl.Result{}, nil
		}
		log.V(3).Error(err, "Unable to retrieve pod group")
		return ctrl.Result{}, err
	}

	...
	switch pgCopy.Status.Phase {
	case "":
		pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
	case schedv1alpha1.PodGroupPending:
		...
	default:
                 // here.....
		pgCopy.Status.Running, pgCopy.Status.Succeeded, pgCopy.Status.Failed = getCurrentPodStats(pods)
		if len(pods) < int(pg.Spec.MinMember) {
			pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
			break
		}

         ...

	return r.patchPodGroup(ctx, pg, pgCopy)
}

func getCurrentPodStats(pods []v1.Pod) (int32, int32, int32) {
        //  should we lock here ?
	if len(pods) == 0 {
		return 0, 0, 0
	}

	var (
		running   int32 = 0
		succeeded int32 = 0
		failed    int32 = 0
	)
	for _, pod := range pods {
		switch pod.Status.Phase {
		case v1.PodRunning:
			running++
		case v1.PodSucceeded:
			succeeded++
		case v1.PodFailed:
			failed++
		}
	}
	return running, succeeded, failed
}

https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/controllers/podgroup_controller.go#L113

https://github.com/kubernetes-sigs/scheduler-plugins/blob/master/pkg/controllers/podgroup_controller.go#L161

CYJiang commented 3 months ago

/assign