Regarding concurrency issues with the Reconcile method in the PodGroup controller
googs1025 opened this issue · comments
k8s operator controller operates concurrently, and we should use locks in the Reconcile method to address concurrency issues, right? I think it is necessary, but if my understanding is incorrect, I will close the issue. If locks are indeed required, I will try to resolve it.
func (r *PodGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)
log.Info("reconciling")
pg := &schedv1alpha1.PodGroup{}
if err := r.Get(ctx, req.NamespacedName, pg); err != nil {
if apierrs.IsNotFound(err) {
log.V(5).Info("Pod group has been deleted")
return ctrl.Result{}, nil
}
log.V(3).Error(err, "Unable to retrieve pod group")
return ctrl.Result{}, err
}
...
switch pgCopy.Status.Phase {
case "":
pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
case schedv1alpha1.PodGroupPending:
...
default:
// here.....
pgCopy.Status.Running, pgCopy.Status.Succeeded, pgCopy.Status.Failed = getCurrentPodStats(pods)
if len(pods) < int(pg.Spec.MinMember) {
pgCopy.Status.Phase = schedv1alpha1.PodGroupPending
break
}
...
return r.patchPodGroup(ctx, pg, pgCopy)
}
func getCurrentPodStats(pods []v1.Pod) (int32, int32, int32) {
// should we lock here ?
if len(pods) == 0 {
return 0, 0, 0
}
var (
running int32 = 0
succeeded int32 = 0
failed int32 = 0
)
for _, pod := range pods {
switch pod.Status.Phase {
case v1.PodRunning:
running++
case v1.PodSucceeded:
succeeded++
case v1.PodFailed:
failed++
}
}
return running, succeeded, failed
}
/assign