FlexBox - A Flexible Primal-Dual ToolBox
flexGradientOperator.h
1 #ifndef flexGradientOperator_H
2 #define flexGradientOperator_H
3 
4 #include <vector>
5 #include "flexLinearOperator.h"
6 
7 #ifdef __CUDACC__
8 template<typename T>
9 __global__ void dxp2dCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, mySign s)
10 {
11  const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
12  const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
13 
14  if (x >= sizeX || y >= sizeY)
15  return;
16 
17  const size_t tmpIndex = x + y * sizeX;
18 
19  if (x < sizeX - 1)
20  {
21  switch (s)
22  {
23  case PLUS:
24  {
25  output[tmpIndex] += input[tmpIndex + 1] - input[tmpIndex];
26  break;
27  }
28  case MINUS:
29  {
30  output[tmpIndex] -= input[tmpIndex + 1] - input[tmpIndex];
31  break;
32  }
33  case EQUALS:
34  {
35  output[tmpIndex] = input[tmpIndex + 1] - input[tmpIndex];
36  break;
37  }
38  }
39  }
40 }
41 
42 template<typename T>
43 __global__ void dxp2dTransposedCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, mySign s)
44 {
45  const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
46  const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
47 
48  if (x >= sizeX || y >= sizeY)
49  return;
50 
51  const size_t tmpIndex = x + y * sizeX;
52 
53  if (x < sizeX - 1 && x > 0)
54  {
55  switch (s)
56  {
57  case PLUS:
58  {
59  output[tmpIndex] += -(input[tmpIndex] - input[tmpIndex - 1]);
60  break;
61  }
62  case MINUS:
63  {
64  output[tmpIndex] -= -(input[tmpIndex] - input[tmpIndex - 1]);
65  break;
66  }
67  case EQUALS:
68  {
69  output[tmpIndex] = -(input[tmpIndex] - input[tmpIndex - 1]);
70  break;
71  }
72  }
73  }
74  else if (x == 0)
75  {
76  switch (s)
77  {
78  case PLUS:
79  {
80  output[tmpIndex] += -(input[tmpIndex]);
81  break;
82  }
83  case MINUS:
84  {
85  output[tmpIndex] -= -(input[tmpIndex]);
86  break;
87  }
88  case EQUALS:
89  {
90  output[tmpIndex] = -(input[tmpIndex]);
91  break;
92  }
93  }
94  }
95  else
96  {
97  switch (s)
98  {
99  case PLUS:
100  {
101  output[tmpIndex] += (input[tmpIndex - 1]);
102  break;
103  }
104  case MINUS:
105  {
106  output[tmpIndex] -= (input[tmpIndex - 1]);
107  break;
108  }
109  case EQUALS:
110  {
111  output[tmpIndex] = (input[tmpIndex - 1]);
112  break;
113  }
114  }
115  }
116 }
117 
118 template<typename T>
119 __global__ void dyp2dCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, mySign s)
120 {
121  const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
122  const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
123 
124  if (x >= sizeX || y >= sizeY)
125  return;
126 
127  const size_t tmpIndex = x + y * sizeX;
128 
129  if (y < sizeY - 1)
130  {
131  switch (s)
132  {
133  case PLUS:
134  {
135  output[tmpIndex] += input[tmpIndex + sizeX] - input[tmpIndex];
136  break;
137  }
138  case MINUS:
139  {
140  output[tmpIndex] -= input[tmpIndex + sizeX] - input[tmpIndex];
141  break;
142  }
143  case EQUALS:
144  {
145  output[tmpIndex] = input[tmpIndex + sizeX] - input[tmpIndex];
146  break;
147  }
148  }
149  }
150 }
151 
152 template<typename T>
153 __global__ void dyp2dTransposedCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, mySign s)
154 {
155  const size_t x = threadIdx.x + blockIdx.x * blockDim.x;
156  const size_t y = threadIdx.y + blockIdx.y * blockDim.y;
157 
158  if (x >= sizeX || y >= sizeY)
159  return;
160 
161  const size_t tmpIndex = x + y * sizeX;
162 
163  if (y < sizeY - 1 && y > 0)
164  {
165  switch (s)
166  {
167  case PLUS:
168  {
169  output[tmpIndex] += -(input[tmpIndex] - input[tmpIndex - sizeX]);
170  break;
171  }
172  case MINUS:
173  {
174  output[tmpIndex] -= -(input[tmpIndex] - input[tmpIndex - sizeX]);
175  break;
176  }
177  case EQUALS:
178  {
179  output[tmpIndex] = -(input[tmpIndex] - input[tmpIndex - sizeX]);
180  break;
181  }
182  }
183  }
184  else if (y == 0)
185  {
186  switch (s)
187  {
188  case PLUS:
189  {
190  output[tmpIndex] += -(input[tmpIndex]);
191  break;
192  }
193  case MINUS:
194  {
195  output[tmpIndex] -= -(input[tmpIndex]);
196  break;
197  }
198  case EQUALS:
199  {
200  output[tmpIndex] = -(input[tmpIndex]);
201  break;
202  }
203  }
204  }
205  else
206  {
207  switch (s)
208  {
209  case PLUS:
210  {
211  output[tmpIndex] += (input[tmpIndex - sizeX]);
212  break;
213  }
214  case MINUS:
215  {
216  output[tmpIndex] -= (input[tmpIndex - sizeX]);
217  break;
218  }
219  case EQUALS:
220  {
221  output[tmpIndex] = (input[tmpIndex - sizeX]);
222  break;
223  }
224  }
225  }
226 }
227 
228 
229 __device__
230 int getGlobalIdx_3D_3D(){
231  int blockId = blockIdx.x + blockIdx.y * gridDim.x
232  + gridDim.x * gridDim.y * blockIdx.z;
233  int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
234  + (threadIdx.z * (blockDim.x * blockDim.y))
235  + (threadIdx.y * blockDim.x) + threadIdx.x;
236  return threadId;
237 }
238 
239 template<typename T>
240 __global__ void dxp3dCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, const size_t sizeZ, mySign s)
241 {
242  const size_t x = blockDim.x * blockIdx.x + threadIdx.x;
243  const size_t y = blockDim.y * blockIdx.y + threadIdx.y;
244  const size_t z = blockDim.z * blockIdx.z + threadIdx.z;
245 
246  const size_t tmpIndex = x + sizeX * y + sizeX * sizeY * z;
247 
248  if (x >= sizeX || y >= sizeY || z >= sizeZ)
249  return;
250 
251  if (x < sizeX - 1)
252  {
253  switch (s)
254  {
255  case PLUS:
256  {
257  output[tmpIndex] += input[tmpIndex + 1] - input[tmpIndex];
258  break;
259  }
260  case MINUS:
261  {
262  output[tmpIndex] -= input[tmpIndex + 1] - input[tmpIndex];
263  break;
264  }
265  case EQUALS:
266  {
267  output[tmpIndex] = input[tmpIndex + 1] - input[tmpIndex];
268  break;
269  }
270  }
271  }
272 }
273 
274 template<typename T>
275 __global__ void dxp3dTransposedCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, const size_t sizeZ, mySign s)
276 {
277  const size_t x = blockDim.x * blockIdx.x + threadIdx.x;
278  const size_t y = blockDim.y * blockIdx.y + threadIdx.y;
279  const size_t z = blockDim.z * blockIdx.z + threadIdx.z;
280 
281  const size_t tmpIndex = x + sizeX * y + sizeX * sizeY * z;
282 
283  if (x >= sizeX || y >= sizeY || z >= sizeZ)
284  return;
285 
286  if (x < sizeX - 1 && x > 0)
287  {
288  switch (s)
289  {
290  case PLUS:
291  {
292  output[tmpIndex] += -(input[tmpIndex] - input[tmpIndex - 1]);
293  break;
294  }
295  case MINUS:
296  {
297  output[tmpIndex] -= -(input[tmpIndex] - input[tmpIndex - 1]);
298  break;
299  }
300  case EQUALS:
301  {
302  output[tmpIndex] = -(input[tmpIndex] - input[tmpIndex - 1]);
303  break;
304  }
305  }
306  }
307  else if (x == 0)
308  {
309  switch (s)
310  {
311  case PLUS:
312  {
313  output[tmpIndex] += -(input[tmpIndex]);
314  break;
315  }
316  case MINUS:
317  {
318  output[tmpIndex] -= -(input[tmpIndex]);
319  break;
320  }
321  case EQUALS:
322  {
323  output[tmpIndex] = -(input[tmpIndex]);
324  break;
325  }
326  }
327  }
328  else
329  {
330  switch (s)
331  {
332  case PLUS:
333  {
334  output[tmpIndex] += (input[tmpIndex - 1]);
335  break;
336  }
337  case MINUS:
338  {
339  output[tmpIndex] -= (input[tmpIndex - 1]);
340  break;
341  }
342  case EQUALS:
343  {
344  output[tmpIndex] = (input[tmpIndex - 1]);
345  break;
346  }
347  }
348  }
349 }
350 
351 template<typename T>
352 __global__ void dyp3dCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, const size_t sizeZ, mySign s)
353 {
354  const size_t x = blockDim.x * blockIdx.x + threadIdx.x;
355  const size_t y = blockDim.y * blockIdx.y + threadIdx.y;
356  const size_t z = blockDim.z * blockIdx.z + threadIdx.z;
357 
358  const size_t tmpIndex = x + sizeX * y + sizeX * sizeY * z;
359 
360  if (x >= sizeX || y >= sizeY || z >= sizeZ)
361  return;
362 
363  if (y < sizeY - 1)
364  {
365  switch (s)
366  {
367  case PLUS:
368  {
369  output[tmpIndex] += input[tmpIndex + sizeX] - input[tmpIndex];
370  break;
371  }
372  case MINUS:
373  {
374  output[tmpIndex] -= input[tmpIndex + sizeX] - input[tmpIndex];
375  break;
376  }
377  case EQUALS:
378  {
379  output[tmpIndex] = input[tmpIndex + sizeX] - input[tmpIndex];
380  break;
381  }
382  }
383  }
384 }
385 
386 template<typename T>
387 __global__ void dyp3dTransposedCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, const size_t sizeZ, mySign s)
388 {
389  const size_t x = blockDim.x * blockIdx.x + threadIdx.x;
390  const size_t y = blockDim.y * blockIdx.y + threadIdx.y;
391  const size_t z = blockDim.z * blockIdx.z + threadIdx.z;
392 
393  const size_t tmpIndex = x + sizeX * y + sizeX * sizeY * z;
394 
395  if (x >= sizeX || y >= sizeY || z >= sizeZ)
396  return;
397 
398  if (y < sizeY - 1 && y > 0)
399  {
400  switch (s)
401  {
402  case PLUS:
403  {
404  output[tmpIndex] += -(input[tmpIndex] - input[tmpIndex - sizeX]);
405  break;
406  }
407  case MINUS:
408  {
409  output[tmpIndex] -= -(input[tmpIndex] - input[tmpIndex - sizeX]);
410  break;
411  }
412  case EQUALS:
413  {
414  output[tmpIndex] = -(input[tmpIndex] - input[tmpIndex - sizeX]);
415  break;
416  }
417  }
418  }
419  else if (y == 0)
420  {
421  switch (s)
422  {
423  case PLUS:
424  {
425  output[tmpIndex] += -(input[tmpIndex]);
426  break;
427  }
428  case MINUS:
429  {
430  output[tmpIndex] -= -(input[tmpIndex]);
431  break;
432  }
433  case EQUALS:
434  {
435  output[tmpIndex] = -(input[tmpIndex]);
436  break;
437  }
438  }
439  }
440  else
441  {
442  switch (s)
443  {
444  case PLUS:
445  {
446  output[tmpIndex] += (input[tmpIndex - sizeX]);
447  break;
448  }
449  case MINUS:
450  {
451  output[tmpIndex] -= (input[tmpIndex - sizeX]);
452  break;
453  }
454  case EQUALS:
455  {
456  output[tmpIndex] = (input[tmpIndex - sizeX]);
457  break;
458  }
459  }
460  }
461 }
462 
463 template<typename T>
464 __global__ void dzp3dCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, const size_t sizeZ, mySign s)
465 {
466  const size_t x = blockDim.x * blockIdx.x + threadIdx.x;
467  const size_t y = blockDim.y * blockIdx.y + threadIdx.y;
468  const size_t z = blockDim.z * blockIdx.z + threadIdx.z;
469 
470  const size_t tmpIndex = x + sizeX * y + sizeX * sizeY * z;
471 
472  if (x >= sizeX || y >= sizeY || z >= sizeZ)
473  return;
474 
475  if (z < sizeZ - 1)
476  {
477  switch (s)
478  {
479  case PLUS:
480  {
481  output[tmpIndex] += input[tmpIndex + sizeX * sizeY] - input[tmpIndex];
482  break;
483  }
484  case MINUS:
485  {
486  output[tmpIndex] -= input[tmpIndex + sizeX * sizeY] - input[tmpIndex];
487  break;
488  }
489  case EQUALS:
490  {
491  output[tmpIndex] = input[tmpIndex + sizeX * sizeY] - input[tmpIndex];
492  break;
493  }
494  }
495  }
496 }
497 
498 template<typename T>
499 __global__ void dzp3dTransposedCUDA(T* output, const T* input, const size_t sizeX, const size_t sizeY, const size_t sizeZ, mySign s)
500 {
501  const size_t x = blockDim.x * blockIdx.x + threadIdx.x;
502  const size_t y = blockDim.y * blockIdx.y + threadIdx.y;
503  const size_t z = blockDim.z * blockIdx.z + threadIdx.z;
504 
505  const size_t tmpIndex = x + sizeX * y + sizeX * sizeY * z;
506 
507  if (x >= sizeX || y >= sizeY || z >= sizeZ)
508  return;
509 
510  if (z < sizeZ - 1 && z > 0)
511  {
512  switch (s)
513  {
514  case PLUS:
515  {
516  output[tmpIndex] += -(input[tmpIndex] - input[tmpIndex - sizeX * sizeY]);
517  break;
518  }
519  case MINUS:
520  {
521  output[tmpIndex] -= -(input[tmpIndex] - input[tmpIndex - sizeX * sizeY]);
522  break;
523  }
524  case EQUALS:
525  {
526  output[tmpIndex] = -(input[tmpIndex] - input[tmpIndex - sizeX * sizeY]);
527  break;
528  }
529  }
530  }
531  else if (z == 0)
532  {
533  switch (s)
534  {
535  case PLUS:
536  {
537  output[tmpIndex] += -(input[tmpIndex]);
538  break;
539  }
540  case MINUS:
541  {
542  output[tmpIndex] -= -(input[tmpIndex]);
543  break;
544  }
545  case EQUALS:
546  {
547  output[tmpIndex] = -(input[tmpIndex]);
548  break;
549  }
550  }
551  }
552  else
553  {
554  switch (s)
555  {
556  case PLUS:
557  {
558  output[tmpIndex] += (input[tmpIndex - sizeX * sizeY]);
559  break;
560  }
561  case MINUS:
562  {
563  output[tmpIndex] -= (input[tmpIndex - sizeX * sizeY]);
564  break;
565  }
566  case EQUALS:
567  {
568  output[tmpIndex] = (input[tmpIndex - sizeX * sizeY]);
569  break;
570  }
571  }
572  }
573 }
574 
575 #endif
576 
578 template<typename T>
580 {
581 
582 #ifdef __CUDACC__
583  typedef thrust::device_vector<T> Tdata;
584 #else
585  typedef std::vector<T> Tdata;
586 #endif
587 
588 private:
589  std::vector<int> inputDimension;
590  int gradDirection;
591  gradientType type;
592  int numberDimensions;
593 
594 public:
595 
597 
603  flexGradientOperator(std::vector<int> AInputDimension, int aGradDirection, gradientType aType, bool aMinus) :
604  inputDimension(AInputDimension),
605  gradDirection(aGradDirection),
606  type(aType),
607  numberDimensions(static_cast<int>(AInputDimension.size())), flexLinearOperator<T>(vectorProduct(AInputDimension), vectorProduct(AInputDimension), gradientOp, aMinus)
608  {};
609 
611  {
612  std::vector<int> dimsCopy;
613  dimsCopy.resize(this->inputDimension.size());
614 
615  std::copy(this->inputDimension.begin(), this->inputDimension.end(), dimsCopy.begin());
616 
617  return new flexGradientOperator<T>(dimsCopy, this->gradDirection, this->type, this->isMinus);
618  }
619 
620  //
621  //
622  // 3d cases
623  //
624  //
625 
626  void updateValue(T* ptr, mySign s, T value)
627  {
628  switch (s)
629  {
630  case PLUS:
631  {
632  *ptr += value;
633  break;
634  }
635  case MINUS:
636  {
637  *ptr -= value;
638  break;
639  }
640  case EQUALS:
641  {
642  *ptr = value;
643  break;
644  }
645  }
646  }
647 
648  void dxp3d(const Tdata &input, Tdata &output, mySign s)
649  {
650  int sizeZ = this->inputDimension[2];
651  int sizeY = this->inputDimension[1];
652  int sizeX = this->inputDimension[0] - 1;
653 
654  #pragma omp parallel for
655  for (int k = 0; k < sizeZ; ++k)
656  {
657  for (int j = 0; j < sizeY; ++j)
658  {
659  for (int i = 0; i < sizeX; ++i)
660  {
661  const int tmpIndex = this->index3DtoLinear(i, j, k);
662 
663  this->updateValue(&output[tmpIndex], s, input[tmpIndex + 1] - input[tmpIndex]);
664  }
665  }
666  }
667  }
668 
669  void dyp3d(const Tdata &input, Tdata &output, mySign s)
670  {
671  int sizeZ = this->inputDimension[2];
672  int sizeY = this->inputDimension[1] - 1;
673  int sizeX = this->inputDimension[0];
674 
675  #pragma omp parallel for
676  for (int k = 0; k < sizeZ; ++k)
677  {
678  for (int j = 0; j < sizeY; ++j)
679  {
680  for (int i = 0; i < sizeX; ++i)
681  {
682  const int tmpIndex1 = this->index3DtoLinear(i, j, k);
683  const int tmpIndex2 = this->index3DtoLinear(i, j + 1, k);
684 
685  this->updateValue(&output[tmpIndex1], s, input[tmpIndex2] - input[tmpIndex1]);
686  }
687  }
688  }
689  }
690 
691  void dzp3d(const Tdata &input, Tdata &output, mySign s)
692  {
693  int sizeZ = this->inputDimension[2] - 1;
694  int sizeY = this->inputDimension[1];
695  int sizeX = this->inputDimension[0];
696 
697  #pragma omp parallel for
698  for (int k = 0; k < sizeZ; ++k)
699  {
700  for (int j = 0; j < sizeY; ++j)
701  {
702  for (int i = 0; i < sizeX; ++i)
703  {
704  const int tmpIndex1 = this->index3DtoLinear(i, j, k);
705  const int tmpIndex2 = this->index3DtoLinear(i, j, k + 1);
706 
707  this->updateValue(&output[tmpIndex1], s, input[tmpIndex2] - input[tmpIndex1]);
708  }
709  }
710  }
711  }
712 
713  void dxp3dTransposed(const Tdata &input, Tdata &output, mySign s)
714  {
715  const int sizeZ = this->inputDimension[2];
716  const int sizeY = this->inputDimension[1];
717  const int sizeX = this->inputDimension[0] - 1;
718 
719  #pragma omp parallel for
720  for (int k = 0; k < sizeZ; ++k)
721  {
722  for (int j = 0; j < sizeY; ++j)
723  {
724  for (int i = 1; i < sizeX; ++i)
725  {
726  int tmpIndex = this->index3DtoLinear(i, j, k);
727 
728  this->updateValue(&output[tmpIndex], s, -(input[tmpIndex] - input[tmpIndex - 1]));
729  }
730  }
731  }
732 
733  #pragma omp parallel for
734  for (int k = 0; k < this->inputDimension[2]; ++k)
735  {
736  for (int j = 0; j < this->inputDimension[1]; ++j)
737  {
738  const int index1 = this->index3DtoLinear(0, j, k);
739  const int index2 = this->index3DtoLinear(this->inputDimension[0] - 1, j, k);
740  const int index3 = this->index3DtoLinear(this->inputDimension[0] - 2, j, k);
741 
742  this->updateValue(&output[index1], s, -input[index1]);
743  this->updateValue(&output[index2], s, input[index3]);
744  }
745  }
746  }
747 
748  void dyp3dTransposed(const Tdata &input, Tdata &output, mySign s)
749  {
750  const int sizeZ = this->inputDimension[2];
751  const int sizeY = this->inputDimension[1] - 1;
752  const int sizeX = this->inputDimension[0];
753 
754  #pragma omp parallel for
755  for (int k = 0; k < sizeZ; ++k)
756  {
757  for (int j = 1; j < sizeY; ++j)
758  {
759  for (int i = 0; i < sizeX; ++i)
760  {
761  const int tmpIndex1 = this->index3DtoLinear(i, j, k);
762  const int tmpIndex2 = this->index3DtoLinear(i, j - 1, k);
763 
764  this->updateValue(&output[tmpIndex1], s, -(input[tmpIndex1] - input[tmpIndex2]));
765  }
766  }
767  }
768 
769  #pragma omp parallel for
770  for (int k = 0; k < this->inputDimension[2]; ++k)
771  {
772  for (int i = 0; i < this->inputDimension[0]; ++i)
773  {
774  const int index1 = this->index3DtoLinear(i, 0, k);
775  const int index2 = this->index3DtoLinear(i, this->inputDimension[1] - 1, k);
776  const int index3 = this->index3DtoLinear(i, this->inputDimension[1] - 2, k);
777 
778  this->updateValue(&output[index1], s, -input[index1]);
779  this->updateValue(&output[index2], s, input[index3]);
780  }
781  }
782  }
783 
784  void dzp3dTransposed(const Tdata &input, Tdata &output, mySign s)
785  {
786  const int sizeZ = this->inputDimension[2] - 1;
787  const int sizeY = this->inputDimension[1];
788  const int sizeX = this->inputDimension[0];
789 
790  #pragma omp parallel for
791  for (int k = 1; k < sizeZ; ++k)
792  {
793  for (int j = 0; j < sizeY; ++j)
794  {
795  for (int i = 0; i < sizeX; ++i)
796  {
797  const int tmpIndex1 = this->index3DtoLinear(i, j, k);
798  const int tmpIndex2 = this->index3DtoLinear(i, j, k - 1);
799 
800  this->updateValue(&output[tmpIndex1], s, -(input[tmpIndex1] - input[tmpIndex2]));
801  }
802  }
803  }
804 
805  #pragma omp parallel for
806  for (int j = 0; j < this->inputDimension[1]; ++j)
807  {
808  for (int i = 0; i < this->inputDimension[0]; ++i)
809  {
810  const int index1 = this->index3DtoLinear(i, j, 0);
811  const int index2 = this->index3DtoLinear(i, j, this->inputDimension[2] - 1);
812  const int index3 = this->index3DtoLinear(i, j, this->inputDimension[2] - 2);
813 
814  this->updateValue(&output[index1], s, -input[index1]);
815  this->updateValue(&output[index2], s, input[index3]);
816  }
817  }
818  }
819 
820  //2d cases
821  void dxp2d(const Tdata &input, Tdata &output, mySign s)
822  {
823  int sizeY = this->inputDimension[1];
824  int sizeX = this->inputDimension[0] - 1;
825 
826  #pragma omp parallel for
827  for (int j = 0; j < sizeY; ++j)
828  {
829  for (int i = 0; i < sizeX; ++i)
830  {
831  const int tmpIndex = this->index2DtoLinear(i, j);
832 
833  switch (s)
834  {
835  case PLUS:
836  {
837  output[tmpIndex] += input[tmpIndex + 1] - input[tmpIndex];
838  break;
839  }
840  case MINUS:
841  {
842  output[tmpIndex] -= input[tmpIndex + 1] - input[tmpIndex];
843  break;
844  }
845  case EQUALS:
846  {
847  output[tmpIndex] = input[tmpIndex + 1] - input[tmpIndex];
848  break;
849  }
850  }
851  }
852  }
853  }
854 
855  void dyp2d(const Tdata &input, Tdata &output, mySign s)
856  {
857  int sizeY = this->inputDimension[1] - 1;
858  int sizeX = this->inputDimension[0];
859 
860  #pragma omp parallel for
861  for (int j = 0; j < sizeY; ++j)
862  {
863  for (int i = 0; i < sizeX; ++i)
864  {
865  const int tmpIndex = this->index2DtoLinear(i, j);
866 
867  switch (s)
868  {
869  case PLUS:
870  {
871  output[tmpIndex] += input[tmpIndex + sizeX] - input[tmpIndex];
872  break;
873  }
874  case MINUS:
875  {
876  output[tmpIndex] -= input[tmpIndex + sizeX] - input[tmpIndex];
877  break;
878  }
879  case EQUALS:
880  {
881  output[tmpIndex] = input[tmpIndex + sizeX] - input[tmpIndex];
882  break;
883  }
884  }
885  }
886  }
887  }
888 
889  void dxp2dTransposed(const Tdata &input, Tdata &output, mySign s)
890  {
891  int sizeY = this->inputDimension[1];
892  int sizeX = this->inputDimension[0] - 1;
893 
894  #pragma omp parallel for
895  for (int j = 0; j < sizeY; ++j)
896  {
897  for (int i = 1; i < sizeX; ++i)
898  {
899  int tmpIndex = this->index2DtoLinear(i, j);
900 
901  switch (s)
902  {
903  case PLUS:
904  {
905  output[tmpIndex] += -(input[tmpIndex] - input[tmpIndex - 1]);
906  break;
907  }
908  case MINUS:
909  {
910  output[tmpIndex] -= -(input[tmpIndex] - input[tmpIndex - 1]);
911  break;
912  }
913  case EQUALS:
914  {
915  output[tmpIndex] = -(input[tmpIndex] - input[tmpIndex - 1]);
916  break;
917  }
918  }
919  }
920  }
921 
922  for (int j = 0; j < this->inputDimension[1]; ++j)
923  {
924  switch (s)
925  {
926  case PLUS:
927  {
928  output[this->index2DtoLinear(0, j)] += -input[this->index2DtoLinear(0, j)];
929  output[this->index2DtoLinear(this->inputDimension[0] - 1, j)] += input[this->index2DtoLinear(this->inputDimension[0] - 2, j)];
930  break;
931  }
932  case MINUS:
933  {
934  output[this->index2DtoLinear(0, j)] -= -input[this->index2DtoLinear(0, j)];
935  output[this->index2DtoLinear(this->inputDimension[0] - 1, j)] -= input[this->index2DtoLinear(this->inputDimension[0] - 2, j)];
936  break;
937  }
938  case EQUALS:
939  {
940  output[this->index2DtoLinear(0, j)] = -input[this->index2DtoLinear(0, j)];
941  output[this->index2DtoLinear(this->inputDimension[0] - 1, j)] = input[this->index2DtoLinear(this->inputDimension[0] - 2, j)];
942  break;
943  }
944  }
945  }
946  }
947 
948  void dyp2dTransposed(const Tdata &input, Tdata &output, mySign s)
949  {
950  int sizeY = this->inputDimension[1] - 1;
951  int sizeX = this->inputDimension[0];
952 
953  #pragma omp parallel for
954  for (int j = 1; j < sizeY; ++j)
955  {
956  for (int i = 0; i < sizeX; ++i)
957  {
958  int tmpIndex = this->index2DtoLinear(i, j);
959 
960  switch (s)
961  {
962  case PLUS:
963  {
964  output[tmpIndex] += -(input[tmpIndex] - input[tmpIndex - sizeX]);
965  break;
966  }
967  case MINUS:
968  {
969  output[tmpIndex] -= -(input[tmpIndex] - input[tmpIndex - sizeX]);
970  break;
971  }
972  case EQUALS:
973  {
974  output[tmpIndex] = -(input[tmpIndex] - input[tmpIndex - sizeX]);
975  break;
976  }
977  }
978  }
979  }
980 
981  for (int i = 0; i < this->inputDimension[0]; ++i)
982  {
983  switch (s)
984  {
985  case PLUS:
986  {
987  output[this->index2DtoLinear(i, 0)] += -input[this->index2DtoLinear(i, 0)];
988  output[this->index2DtoLinear(i, this->inputDimension[1] - 1)] += input[this->index2DtoLinear(i, this->inputDimension[1] - 2)];
989  break;
990  }
991  case MINUS:
992  {
993  output[this->index2DtoLinear(i, 0)] -= -input[this->index2DtoLinear(i, 0)];
994  output[this->index2DtoLinear(i, this->inputDimension[1] - 1)] -= input[this->index2DtoLinear(i, this->inputDimension[1] - 2)];
995  break;
996  }
997  case EQUALS:
998  {
999  output[this->index2DtoLinear(i, 0)] = -input[this->index2DtoLinear(i, 0)];
1000  output[this->index2DtoLinear(i, this->inputDimension[1] - 1)] = input[this->index2DtoLinear(i, this->inputDimension[1] - 2)];
1001  break;
1002  }
1003  }
1004  }
1005  }
1006 
1007  void doTimesCPU(bool transposed, const Tdata &input, Tdata &output, mySign s)
1008  {
1009  if (this->inputDimension.size() == 2)
1010  {
1011  if (this->gradDirection == 0)
1012  {
1013  if (transposed == false)
1014  {
1015  this->dxp2d(input, output, s);
1016  }
1017  else
1018  {
1019  this->dxp2dTransposed(input, output, s);
1020  }
1021  }
1022  else if (this->gradDirection == 1)
1023  {
1024  if (transposed == false)
1025  {
1026  this->dyp2d(input, output, s);
1027  }
1028  else
1029  {
1030  this->dyp2dTransposed(input, output, s);
1031  }
1032  }
1033  }
1034  else if (this->inputDimension.size() == 3)
1035  {
1036  if (this->gradDirection == 0)
1037  {
1038  if (transposed == false)
1039  {
1040  this->dxp3d(input, output, s);
1041  }
1042  else
1043  {
1044  this->dxp3dTransposed(input, output, s);
1045  }
1046  }
1047  else if (this->gradDirection == 1)
1048  {
1049  if (transposed == false)
1050  {
1051  this->dyp3d(input, output, s);
1052  }
1053  else
1054  {
1055  this->dyp3dTransposed(input, output, s);
1056  }
1057  }
1058  else if (this->gradDirection == 2)
1059  {
1060  if (transposed == false)
1061  {
1062  this->dzp3d(input, output, s);
1063  }
1064  else
1065  {
1066  this->dzp3dTransposed(input, output, s);
1067  }
1068  }
1069  }
1070  else
1071  {
1072  printf("Gradient not implemented for dim!={2,3}\n");
1073  //TODO: implement gradient for dim!={2,3} for CPU version
1074  }
1075  }
1076 
1077  #ifdef __CUDACC__
1078  void doTimesCUDA(bool transposed, const Tdata &input, Tdata &output, mySign s)
1079  {
1080  size_t sizeX = this->inputDimension[0];
1081  size_t sizeY = 1;
1082  size_t sizeZ = 1;
1083 
1084  if (this->inputDimension.size() > 1)
1085  {
1086  sizeY = this->inputDimension[1];
1087  }
1088  if (this->inputDimension.size() > 2)
1089  {
1090  sizeZ = this->inputDimension[2];
1091  }
1092 
1093  dim3 block = dim3(32,16,1);
1094  dim3 grid = dim3((sizeX + block.x - 1) / block.x, (sizeY + block.y - 1) / block.y, (sizeZ + block.z - 1) / block.z);
1095 
1096  T* ptrOutput = thrust::raw_pointer_cast(output.data());
1097  const T* ptrInput = thrust::raw_pointer_cast(input.data());
1098 
1099  if (this->inputDimension.size() == 2)
1100  {
1101  if (this->gradDirection == 0)
1102  {
1103  if (transposed == false)
1104  {
1105  dxp2dCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], s);
1106  }
1107  else
1108  {
1109  dxp2dTransposedCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], s);
1110  }
1111  }
1112  else if (this->gradDirection == 1)
1113  {
1114  if (transposed == false)
1115  {
1116  dyp2dCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], s);
1117  }
1118  else
1119  {
1120  dyp2dTransposedCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], s);
1121  }
1122  }
1123  }
1124  else if (this->inputDimension.size() == 3)
1125  {
1126  if (this->gradDirection == 0)
1127  {
1128  if (transposed == false)
1129  {
1130  dxp3dCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], this->inputDimension[2], s);
1131  }
1132  else
1133  {
1134  dxp3dTransposedCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], this->inputDimension[2], s);
1135  }
1136  }
1137  else if (this->gradDirection == 1)
1138  {
1139  if (transposed == false)
1140  {
1141  dyp3dCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], this->inputDimension[2], s);
1142  }
1143  else
1144  {
1145  dyp3dTransposedCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], this->inputDimension[2], s);
1146  }
1147  }
1148  else if (this->gradDirection == 2)
1149  {
1150  if (transposed == false)
1151  {
1152  dzp3dCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], this->inputDimension[2], s);
1153  }
1154  else
1155  {
1156  dzp3dTransposedCUDA << <grid, block >> >(ptrOutput, ptrInput, this->inputDimension[0], this->inputDimension[1], this->inputDimension[2], s);
1157  }
1158  }
1159  }
1160  else
1161  {
1162  printf("Gradient not implemented for dim!={2,3}\n");
1163  //TODO: implement gradient for dim!={2,3} for GPU version
1164  }
1165  }
1166  #endif
1167 
1168  void doTimes(bool transposed, const Tdata &input, Tdata &output, mySign s)
1169  {
1170  if (this->isMinus && s == PLUS)
1171  {
1172  s = MINUS;
1173  }
1174  else if (this->isMinus && s == MINUS)
1175  {
1176  s = PLUS;
1177  }
1178 
1179  // flip sign and transposed
1180  if (this->type == backward)
1181  {
1182  transposed = !transposed;
1183  if (s == MINUS)
1184  {
1185  s = PLUS;
1186  }
1187  else
1188  {
1189  s = MINUS;
1190  }
1191  }
1192 
1193  #ifdef __CUDACC__
1194  this->doTimesCUDA(transposed, input, output, s);
1195  #else
1196  this->doTimesCPU(transposed, input, output, s);
1197  #endif
1198  }
1199 
1200  void timesPlus(bool transposed, const Tdata &input, Tdata &output)
1201  {
1202  this->doTimes(transposed, input, output, PLUS);
1203  }
1204 
1205  void timesMinus(bool transposed, const Tdata &input, Tdata &output)
1206  {
1207  this->doTimes(transposed, input, output, MINUS);
1208  }
1209 
1210  void times(bool transposed, const Tdata &input, Tdata &output)
1211  {
1212  this->doTimes(transposed, input, output, EQUALS);
1213  }
1214 
1215  T getMaxRowSumAbs(bool transposed)
1216  {
1217  //row sum of absolute values is at maximum 2
1218  return static_cast<T>(2);
1219  }
1220 
1221  std::vector<T> getAbsRowSum(bool transposed)
1222  {
1223  std::vector<T> result(this->getNumRows(),(T)2);
1224 
1225  return result;
1226  }
1227 
1228  int index3DtoLinear(int i, int j, int k)
1229  {
1230  return (i + j*this->inputDimension[0] + k*this->inputDimension[0] * this->inputDimension[1]);
1231  }
1232 
1233  int index2DtoLinear(int i, int j)
1234  {
1235  return (i + j*this->inputDimension[0]);
1236  }
1237 
1238  #ifdef __CUDACC__
1239  thrust::device_vector<T> getAbsRowSumCUDA(bool transposed)
1240  {
1241  thrust::device_vector<T> result(this->getNumRows(), (T)2);
1242 
1243  return result;
1244  }
1245  #endif
1246 };
1247 
1248 #endif
int getNumRows() const
returns number of rows of the linear operator
Definition: flexLinearOperator.h:57
flexGradientOperator(std::vector< int > AInputDimension, int aGradDirection, gradientType aType, bool aMinus)
initializes the gradient operator
Definition: flexGradientOperator.h:603
represents a gradient operator
Definition: flexGradientOperator.h:579
bool isMinus
determines if operator is negated
Definition: flexLinearOperator.h:25
void times(bool transposed, const Tdata &input, Tdata &output)
applies linear operator on vector
Definition: flexGradientOperator.h:1210
flexGradientOperator< T > * copy()
copies the linear operator
Definition: flexGradientOperator.h:610
T getMaxRowSumAbs(bool transposed)
returns the maximum sum of absolute values per row used for preconditioning
Definition: flexGradientOperator.h:1215
mySign
enum representing the type of concatenation
Definition: tools.h:56
gradientType
enum representing the type of gradient
Definition: tools.h:97
std::vector< T > getAbsRowSum(bool transposed)
returns a vector of sum of absolute values per row used for preconditioning
Definition: flexGradientOperator.h:1221
void timesMinus(bool transposed, const Tdata &input, Tdata &output)
applies linear operator on vector and substracts its result from y
Definition: flexGradientOperator.h:1205
abstract base class for linear operators
Definition: flexLinearOperator.h:12
thrust::device_vector< T > getAbsRowSumCUDA(bool transposed)
same function as getAbsRowSum() but implemented in CUDA
Definition: flexGradientOperator.h:1239
void timesPlus(bool transposed, const Tdata &input, Tdata &output)
applies linear operator on vector and adds its result to y
Definition: flexGradientOperator.h:1200