Witam, mam program, który w prosty sposób oblicza całkę numeryczną z funkcji z wykoryzstaniem OpenMP. Zrealizowałem jedną funkcję liczącą na kilka sposobów, jednak nie dają one takich samych wyników, tzn. jeden wynik różni się od przebiegu kontrolnego

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>




float function(float x){
	return pow(x,pow(x,sin(x)));
}



float integrate(float begin, float end, int count){
	float score = 0 , width = (end-begin)/(1.0*count), i=begin, y1, y2;	
	

	for(i = 0; i<count; i++){
		score += (function(begin+(i*width)) + function(begin+(i+1)*width)) * width/2.0;
    }
	return score;
}




float thread1(float begin, float end, int count){
	float score = 0 , width = (end-begin)/(1.0*count), y1, y2;	
	
	int i;
	#pragma omp parallel for reduction(+:score) private(y1,i) shared(count)
    for(i = 0; i<count; i++){
	    y1 = ((function(begin+(i*width)) + function(begin+(i+1)*width)) * width/2.0);
        score = score + y1;
	}
	
	return score;
}


float thread2(float begin, float end, int count){
	float score = 0 , width = (end-begin)/(1.0*count), y1, y2;	
	
	int i;
	float * tab = (float*)malloc(count * sizeof(float));	

	#pragma omp parallel  for 
	for(i = 0; i<count; i++){
		tab[i] = (function(begin+(i*width)) + function(begin+(i+1)*width)) * width/2.0;
	}
	
	for(i=0; i<count; i++)
		score += tab[i];	
	return score;
}


unsigned long long int rdtsc(void){
     unsigned long long int x;
     unsigned a, d;

     __asm__ volatile("rdtsc" : "=a" (a), "=d" (d));

     return ((unsigned long long)a) | (((unsigned long long)d) << 32);
}






int main(int argc, char** argv){
	unsigned long long counter = 0;	
	
	//Przebiegi rozgrzewające
    integrate (atof(argv[1]), atof(argv[2]), atoi(argv[3]));
    integrate (atof(argv[1]), atof(argv[2]), atoi(argv[3]));
    integrate (atof(argv[1]), atof(argv[2]), atoi(argv[3]));
  	
	//test
	counter = rdtsc();
    printf("control: %f \n ",integrate (atof(argv[1]), atof(argv[2]), atoi(argv[3])));  	
	printf("control count: %lld \n",rdtsc()-counter);
    counter = rdtsc();
	printf("thread1: %f \n ",thread1(atof(argv[1]), atof(argv[2]), atoi(argv[3])));  	
	printf("thread1 count: %lld \n",rdtsc()-counter);
	counter = rdtsc();
	printf("thread2: %f \n ",thread2(atof(argv[1]), atof(argv[2]), atoi(argv[3])));  	
	printf("thread2 count: %lld \n",rdtsc()-counter);
	
	return 0;
}
 

a tu są przykladowe wyniki :

 gcc -fopenmp zad2.c -o zad -pg -lm
env OMP_NUM_THREADS=2 ./zad 3 13 100000
control: 5407308.500000 
 control count: 138308058 
thread1: 5407494.000000 
 thread1 count: 96525618 
thread2: 5407308.500000 
 thread2 count: 104770859 

Czy ktoś by mógłw ytłumaczyć mi, gdzie leży problem?