@@ -1727,44 +1727,64 @@ int rawspec_start_processing(rawspec_context * ctx, int fft_dir)
1727
1727
height = ctx->Nbc ;
1728
1728
1729
1729
for (d=0 ; d < ctx->Nds [i]; d++) {
1730
-
1731
- // Lo to hi
1732
- width = ((ctx->Nts [i]+1 ) / 2 ) * sizeof (float ) * complexity_factor;
1733
- cuda_rc = cudaMemcpy2DAsync (dst + ctx->Nts [i]/2 ,
1734
- dpitch,
1735
- src,
1736
- spitch,
1737
- width,
1738
- height,
1739
- cudaMemcpyDeviceToHost,
1740
- gpu_ctx->compute_stream );
1741
-
1742
- if (cuda_rc != cudaSuccess) {
1743
- PRINT_CUDA_ERRMSG (cuda_rc);
1744
- rawspec_cleanup (ctx);
1745
- return 1 ;
1730
+
1731
+ if (ctx->complex_output ) {
1732
+ // GUPPI RAW output, don't translate channels
1733
+ width = dpitch;
1734
+ cuda_rc = cudaMemcpy2DAsync (dst,
1735
+ dpitch,
1736
+ src,
1737
+ spitch,
1738
+ width,
1739
+ height,
1740
+ cudaMemcpyDeviceToHost,
1741
+ gpu_ctx->compute_stream );
1742
+
1743
+ if (cuda_rc != cudaSuccess) {
1744
+ PRINT_CUDA_ERRMSG (cuda_rc);
1745
+ rawspec_cleanup (ctx);
1746
+ return 1 ;
1747
+ }
1746
1748
}
1747
-
1748
- // Hi to lo
1749
- width = (ctx->Nts [i] / 2 ) * sizeof (float ) * complexity_factor;
1750
- cuda_rc = cudaMemcpy2DAsync (dst,
1751
- dpitch,
1752
- src + (ctx->Nts [i]+1 ) / 2 ,
1753
- spitch,
1754
- width,
1755
- height,
1756
- cudaMemcpyDeviceToHost,
1757
- gpu_ctx->compute_stream );
1758
-
1759
- if (cuda_rc != cudaSuccess) {
1760
- PRINT_CUDA_ERRMSG (cuda_rc);
1761
- rawspec_cleanup (ctx);
1762
- return 1 ;
1749
+ else {
1750
+ // Lo to hi
1751
+ width = ((ctx->Nts [i]+1 ) / 2 ) * sizeof (float );
1752
+ cuda_rc = cudaMemcpy2DAsync (dst + ctx->Nts [i]/2 ,
1753
+ dpitch,
1754
+ src,
1755
+ spitch,
1756
+ width,
1757
+ height,
1758
+ cudaMemcpyDeviceToHost,
1759
+ gpu_ctx->compute_stream );
1760
+
1761
+ if (cuda_rc != cudaSuccess) {
1762
+ PRINT_CUDA_ERRMSG (cuda_rc);
1763
+ rawspec_cleanup (ctx);
1764
+ return 1 ;
1765
+ }
1766
+
1767
+ // Hi to lo
1768
+ width = (ctx->Nts [i] / 2 ) * sizeof (float );
1769
+ cuda_rc = cudaMemcpy2DAsync (dst,
1770
+ dpitch,
1771
+ src + (ctx->Nts [i]+1 ) / 2 ,
1772
+ spitch,
1773
+ width,
1774
+ height,
1775
+ cudaMemcpyDeviceToHost,
1776
+ gpu_ctx->compute_stream );
1777
+
1778
+ if (cuda_rc != cudaSuccess) {
1779
+ PRINT_CUDA_ERRMSG (cuda_rc);
1780
+ rawspec_cleanup (ctx);
1781
+ return 1 ;
1782
+ }
1763
1783
}
1764
1784
1765
1785
// Increment src and dst pointers
1766
- src += ctx->Nts [i] * ctx->Nas [i];
1767
- dst += abs (ctx->Npolout [i]) * ctx->Nts [i] * ctx->Nc ;
1786
+ src += ctx->Nts [i] * ctx->Nas [i] * complexity_factor ;
1787
+ dst += abs (ctx->Npolout [i]) * ctx->Nts [i] * ctx->Nc * complexity_factor ;
1768
1788
}
1769
1789
}
1770
1790
0 commit comments