Python Memory Management
ZacharyVarley opened this issue · comments
ZacharyVarley commented
I am probably missing something simple. After around 10 loops of calling syrk, I get out of memory error.
import numpy as np
import pyopencl as cl
from pyopencl.array import Array
import pyclblast
def blast_covariance_matrix(data, datatype='float32'):
"""Compute the covariance matrix of A.
"""
ctx = cl.create_some_context()
# Set up OpenCL using with so that the queue is automatically released
with cl.CommandQueue(ctx) as queue:
# Set up NumPy arrays
n_samples, n_features = data.shape
data = data.astype(datatype)
# Set up OpenCL array
cl_a = Array(queue, data.shape, data.dtype)
cl_a.set(data)
# Prepare an empty OpenCL array for the result
cl_cov = Array(queue, (n_features, n_features), dtype=datatype)
# Perform the dsyrk operation
pyclblast.syrk(queue, n_features, n_samples, cl_a, cl_cov, n_features, n_features, alpha=1.0, beta=0.0,
lower_triangle=False, a_transp=True)
# Transfer result from device to host and try to avoid memory leaks
covariance_matrix = cl_cov.get()
cl_a.finish()
cl_cov.finish()
queue.finish()
return covariance_matrix
for i in range(100):
data = np.random.rand(100000, 3600)
cov = blast_covariance_matrix(data)
print("finished iteration {}".format(i))
ZacharyVarley commented
I found that the culprit was creating many contexts in a loop. Using a single ctx removes the issue:
import numpy as np
import pyopencl as cl
from pyopencl.array import Array
import pyclblast
# create a single context
ctx = cl.create_some_context()
def blast_covariance_matrix(data, datatype='float32'):
"""Compute the covariance matrix of A.
"""
with cl.CommandQueue(ctx) as queue:
# Set up NumPy arrays
n_samples, n_features = data.shape
data = data.astype(datatype)
# Set up OpenCL array
cl_a = Array(queue, data.shape, data.dtype)
cl_a.set(data)
# Prepare an empty OpenCL array for the result
cl_cov = Array(queue, (n_features, n_features), dtype=datatype)
# Perform the dsyrk operation
pyclblast.syrk(queue, n_features, n_samples, cl_a, cl_cov, n_features, n_features, alpha=1.0, beta=0.0,
lower_triangle=False, a_transp=True)
# Transfer result from device to host
covariance_matrix = cl_cov.get()
return covariance_matrix
for i in range(100):
data = np.random.rand(100000, 3600)
cov = blast_covariance_matrix(data)
print("finished iteration {}".format(i))