Skip to content

This OpenMP code crashes linux

An answer to this question on Stack Overflow.

Question

I was working on some code to row reduce a matrix using openmp. I have two version of it and both make my Ubuntu and Fedora installations crash hard. By hard I mean my mouse and keyboard are unresponsive, and even when I hit the reset button on my PC tower it will not restart. I have to hold down the power button. What is strange about this is that the code crashes after a few minutes of running. It is not consuming large amounts of memory (I consider 750 mb to be small since I have 16gb of ram).

#include <iostream>
#include <cstddef>
#include <cstring>
#include <iomanip>
#include <cstdlib>
#include <ctime>
#include <cmath>
using namespace std;
class Matrix
{
	public:
	
	Matrix(size_t rows, size_t cols):
		data(0), w(rows), h(cols)
	{
		data = new double[w * h];
		memset(data, 0, sizeof(double) * w * h);
	}
	
	~Matrix()
	{
		if(data)
		{
			delete[] data;
			w = h = 0;
			data = 0;
		}
	}
	
	double* operator[](size_t row)
	{
		return data + row * w;
	}
	
	const double* operator[](size_t row) const
	{
		return data + row * w;
	}
	
	size_t width() const
	{
		return w;
	}
	
	size_t height() const
	{
		return h;
	}
	void scale_row(size_t row, double x)
	{
		double* prow = (*this)[row];
		
		for(size_t i = 0; i < w; i++)
			prow[i] *= x;
	}
	
	void add_row(size_t dest_row, size_t source_row, double scaling = 1.0)
	{
		if(dest_row == source_row)
		{
			scale_row(dest_row, 1.0 + scaling);
			return;
		}
		
		double* __restrict__ drow = (*this)[dest_row];
		double* __restrict__ srow = (*this)[source_row];
		
		for(size_t i = 0; i < w; i++)
			drow[i] += srow[i] * scaling;
	}
	
	void swap_rows(size_t r1, size_t r2)
	{
		if(r1 == r2)
			return;
		
		double* __restrict__ a = (*this)[r1];
		double* __restrict__ b = (*this)[r2];
		
		#pragma omp parallel for simd
		for(size_t i = 0; i < w; i++)
		{
			double tmp = a[i];
			a[i] = b[i];
			b[i] = tmp;
		}
	}
	
	double* find_leading(size_t row)
	{
		double* ptr = (*this)[row];
		for(size_t i = 0; i < w; i++)
			if(ptr[i])
				return ptr + i;
		return 0;
	}
	
	void clamp_zeros(double threshold = 1e-12)
	{
		#pragma omp parallel for simd
		for(size_t i = 0; i < w * h; i++)
		{
			if(fabs(data[i]) < threshold)
				data[i] = 0;
		}
	}
	
	void row_reduce(Matrix* mirror = 0)
	{
		for(size_t r1 = 0; r1 < h; r1++)
		{
			double* lead = find_leading(r1);
			if(!lead)
				continue;
			
			size_t rank = lead - (*this)[r1];
			if(mirror)
				mirror->scale_row(r1, 1.0 / *lead);
			scale_row(r1, 1.0 / *lead);
			
			#pragma omp parallel for
			for(size_t r2 = 0; r2 < h; r2++)
			{
				if(r2 == r1 || (*this)[r2][rank] == 0)
					continue;
				if(mirror)
					mirror->add_row(r2, r1, -(*this)[r2][rank]);
				add_row(r2, r1, -(*this)[r2][rank]);
			}
			clamp_zeros();
		}
		
		size_t zero_count = 0;
		for(size_t r = 0; r < h; r++)
		{
			double* lead = find_leading(r);
			if(lead)
			{
				size_t rank = lead - (*this)[r];
				swap_rows(rank, r);
				if(mirror)
					mirror->swap_rows(rank, r);
			}
			else
			{
				size_t with = h - ++zero_count;
				swap_rows(r, with);
				if(mirror)
					mirror->swap_rows(r, with);
			}
		}
	}
	
	private:
	
	double* data;
	size_t w, h;
};
ostream& operator<<(ostream& o, const Matrix& m)
{
	o << setprecision(2);
	for(size_t j = 0; j < m.width(); j++)
	{
		o << "----------";
	}
	o << "--\n";
	for(size_t i = 0; i < m.height(); i++)
	{
		o << "|";
		for(size_t j = 0; j < m.width(); j++)
		{
			o << setw(10) << m[i][j];
		}
		o << "|\n";
	}
	for(size_t j = 0; j < m.width(); j++)
	{
		o << "----------";
	}
	o << "--";
	return o;
}
int main()
{
	srand(time(0));
	Matrix m (10000, 10000);
	
	for(int i = 0; i < m.height(); i++)
	{
		for(int j = 0; j < m.width(); j++)
		{
			m[i][j] = rand() % 100;
		}
	}
	
	time_t start = time(0);
	m.row_reduce();
	time_t end = time(0);
	cout << m[0][2] << endl;
	cout << "dt = " << (end - start) << endl;
	return 0;
}

I also tried out another kind of stupid simple omp program to see if it would crash my system and this one did not.

double sum = 0.0;
double start = omp_get_wtime();
#pragma omp parallel for reduction(+:sum)
for(long long i = 1; i < 100000000000000LL; i++)
{
	sum += 1.0 / ((double)i * i);
}
printf("%lf %lf\n", omp_get_wtime() - start, sum);

I tried the first one and ran into the same issue when I ran it on Ubuntu 15.04 compiled with gcc 4.9 and Fedora 22 compiled with gcc 5.1.

When I run it without openmp it works fine. Also if I try smaller data like a 2000x2000 matrix it works fine (the crash happens when I try a 10,000x10,000 matrix).

Seems to work fine on my laptop which is also running ubuntu 15.04.

Answer

I tested your code on Linux 3.19.0-26-generic #28-Ubuntu 64-bit using GCC 4.9.2.

As depicted below, it used some RAM, but I haven't crashed yet with 11 minutes of CPU time on the clock and RAM holding steady.

System Stats