1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
// Perform Coulomb collisions and calculate the fusion rate for each cell
// in the simulation.
// Input:  - velocity vectors of all particles in the cell
//         - density of particles in cell
//         - cell volume
//         - max number of collision particles allocated per cell
// Result: - New post-collision velocities of particles
//         - Fusion rate (in fusion events-per-second)
// Author: Andrew M. Chap
// Last edited September 2017

__global__ void performCollisions(
        float *vx, float *vr, float *vt,
        int *Ic,
        float *rho, const float *bmax, const float *CellVolumesPM,
        const int maxParticlesPerCell, int *cellResidents,
        const int *cellOccupants, int *cellOccupantsMax,
        float *fusionRate,
        float *partNs, float *partAs,
        curandState *myCurandstate, 
        const float PIdt, const float a_coeff,
        const int nPM, const float L){
    
    // Parallelized by cell.  Each process performs all the collisions in
    // a cell and then moves on to the next cell
    int c = threadIdx.x  + blockDim.x * blockIdx.x;
    
    // All Equation numbers and Figure numbers refer to:
    // Chap, Andrew M., Sedwick, Raymond J., “Coulomb Collision Model for Use in
    // Nonthermal Plasma Simulation”, Phys. Rev. E 95:6 063209 (2017)

    while(c < nPM){ // nPM is number of cells
        int np = cellOccupants[c];
	// Just recording the max number of occupants that have been seen in this cell
        if(np > cellOccupantsMax[c])       
            cellOccupantsMax[c] = np;
        // If we have more particles than the max allocated for, 
        // we need to truncate our number of particles there
        int npTrunc = np <= maxParticlesPerCell ? np : maxParticlesPerCell; 

        // Generate random permutation of particles in the cell
        // (Fisher Yates algorithm)
        for (int p = npTrunc-1; p >= 0; --p){
            // Generate a random number [0, p-1]
	    // curand generates unsigned int random numbers, so we need to convert to int
            int r = (int)(curand(&myCurandstate[c]) % (((unsigned int)(p))+1)); 
            // Swap the last element with element at random index
            int temp = cellResidents[c*maxParticlesPerCell + p];
            cellResidents[c*maxParticlesPerCell + p] = cellResidents[c*maxParticlesPerCell + r];
            cellResidents[c*maxParticlesPerCell + r] = temp;
        }

        float fusionRateHere = 0.0f; // initialize total fusion rate to zero
	// right now we don't collide the last particle if we have an odd number
        for (int p = 0; p < npTrunc-1; p+=2){ // loop through particle pairs
            // -------------------------------------------------
            // Calculate relative velocity
            // -------------------------------------------------
            // select particles to be collided
            int p1 = cellResidents[c*maxParticlesPerCell + p];
            int p2 = cellResidents[c*maxParticlesPerCell + p+1];
            // COM velocity
            float VCx = 0.5f*(vx[p1]+vx[p2]);
            float VCr = 0.5f*(vr[p1]+vr[p2]);
            float VCt = 0.5f*(vt[p1]+vt[p2]);
            // Relative velocity of p1 in the COM frame
            float VRx = vx[p1] - VCx;
            float VRr = vr[p1] - VCr;
            float VRt = vt[p1] - VCt;
            // Rel velocity squared and magnitude
            float VR2 = VRx*VRx + VRr*VRr + VRt*VRt;
            float VRm = sqrtf(VR2);
            // The velocity difference
            float VDx = vx[p1] - vx[p2];
            float VDr = vr[p1] - vr[p2];
            float VDt = vt[p1] - vt[p2];
            // The velocity difference squared and magnitude
            float VD2 = VDx*VDx + VDr*VDr + VDt*VDt;
            float VDm = sqrtf(VD2);
            float VD2i = 1.0f/VD2;

            // ------------------------------------
            // Calculate fusion rate
            // ------------------------------------
            float v = VDm*SECONDSCONVERSION;
            float vInv = 1.0f/v;
            float v2 = v*v;
            float v4 = v2*v2;
            float SV;
            // fusion cross section calculation from fit functions to experimental data
            if (v < V400){
                SV = CV0 + CV1*v2 + CV2*v4 + AVL/((v2-VS148)*(v2-VS148) + VQ235);
            }else if(v < V642){
                float peak400 = (v2 - VS400);
                float peak400squared = peak400*peak400;
                SV = DV0 + 
                     DV1*peak400 - 
                     DV2*peak400*peak400 - 
                     DV3*peak400squared*peak400squared*peak400;
            }else{
                SV = BV0 + 
                     AV0/((v2 - VS5813)*(v2 - VS5813) + VQ857) +
                     AV1/((v2 - VS1083)*(v2 - VS1083) + VQ234) +
                     AV2/((v2 - VS2405)*(v2 - VS2405) + VQ138) +
                     AV3/((v2 - VS3344)*(v2 - VS3344) + VQ309);
            }
            // Density squared multiplied by volume (Have to divide rho by volume to get
            // Density, so in this case we just divide once by volume)
            float fusionContribution = rho[c]*rho[c]/(CellVolumesPM[c]*L*L*L) * 
                    vInv*expf(-VG*vInv)*(1.0f/SECONDSCONVERSION)*SV/4.0f/(float(npTrunc));
            // Above is Eq. (4.18) from my PhD thesis (not in Coulomb paper)
            // Later (in MATLAB plotting) it gets multiplied by dE (8.7e6 MeV)
            // i.e. Eq. (4.19) in PhD thesis
           
            if (fusionContribution != fusionContribution){ //if nan
                fusionContribution = 0.0f;
            }
	    // add fusion rate contribution of these two particles total
            fusionRateHere += fusionContribution;

            // -------------------------------------------------
            // Calculate Coulomb collision scatttering angle
            // -------------------------------------------------
            // The '(float(np)/float(npTrunc))' is there to offset any 'under-colliding'
            // that would be done if we aren't operating on all the particles
            float N = (((PIdt*VDm)*bmax[c]*bmax[c])*rho[c]) / 
                         (CellVolumesPM[c]*(L*L*L))*(float(np)/float(npTrunc));  // Eq. (2)
            // Above eqution divides by cell volume because our "rho" is 
            // actually particles per cell, rather than particles per m^3
            
            // inverse of the magnitude of the relative velocity
            float VRi = VRm>0 ? 1.0f/VRm : 0.0f;
            
            if (N > 0){
                float theta;
                float a = a_coeff*VD2i/bmax[c]; // Eq. (6)
                float sTildeAsqoN = (a*sqrtf(N))*(KS4 - KS1*expf(-KS2*powf(N,KS3))); // Eq. (31a)
                float s = sTildeAsqoN*pow((1.0f + pow((TWOOVERPI*sTildeAsqoN),KS5)),(1.0f/KS5)); // Eq. (32a)
                // Create random variables U (for scattering angle theta) 
                // and V (for azimuthal angle phi)
                float U = curand_uniform(&myCurandstate[c]);
                float V = curand_uniform(&myCurandstate[c]);
                
                if(s > PIOVERTWO-0.2f){ 
                // If s is close to pi/2, we are approximately isotropic.  this happens
                // when two particles are traveling close to the same speed
                    theta = acosf(1.9999f*U-0.9998f);  
                    // use 1.9999 and 0.9998 instead of 2 and 1 to avoid the remote
                    // possibility of an argument equal to -1 or 1
                }else{
                    // Eqs. (31b-31d)
                    float kTilde = 1.0f - KK1*expf(-KK2*powf(N,KK3));
                    float lTilde = KL1*expf(-KL2*powf(N,KL3));
                    float hTilde = KH1*expf(-KH2*powf(N,KH3));
	            // Eqs. (32b-32d)
                    float k = kTilde*expf(KK4*powf(s,KK5));
                    float l = lTilde*expf(-KL4*powf(s,KL5));
                    float h = hTilde*expf(-KH4*powf(s,KH5));
                    
                    // if any of k, l, or h are greater than 1 or less than 0, set within these bounds
                    k = k > 1.0f ? 1.0f : k;
                    k = k < 0.0f ? 0.0f : k;
                    l = l > 1.0f ? 1.0f : l;
                    l = l < 0.0f ? 0.0f : l;
                    h = h > 1.0f ? 1.0f : h;
                    h = h < 0.0f ? 0.0f : h;
                    
                    // Eq. (27) branching (see Fig. 3 in my Coulomb collision paper a graphical representation)
                    if (U >= h){ // theta_transition or theta_low
			// Called "varSigma" because that is the name of the LaTeX character I used to represent it
                        float varSigma = cosf(s)/(sinf(s)*sinf(s)); // Eq. (22)
                        float invVarSigma = 1.0f/varSigma;
                        if (U >= l){ 
                            // Most common route: theta_low
                            // (with extra "abs" to prevent negative number in logarithm,
                            // though U should never be low enough to reach that point)
                            // (u_low-1)/kappa + 1 > 0, so: u_low > 1-kappa 
                            theta = // three different expressions to prevent 
                                    (varSigma>1.0e6f)
                                    ?
                                        // Taylor expansion of acos(1-x) at x=0 is sqrt(2*x).  we need this taylor 
                                        // expansion when varSigma > 1e6 or so, because evaluating 1+1e-8
                                        // in single results in 1
                                        sqrtf(-2*invVarSigma*logf(fabsf((U-1.0f)/k+1.0f))) 
                                        :
                                        (
                                            (varSigma>80.0f)
                                            ?
                                            (acosf(1.0f + invVarSigma*logf(fabsf((U-1.0f)/k+1.0f)))) // eq. (25)
                                            :
                                            (acosf(invVarSigma*logf(fabsf(expf(-varSigma)
                                                + 2.0f*sinhf(varSigma)*((U-1.0f)/k+1.0f))))) // eq. (24)
                                         )
                                    ;
                        }else{ // second-most common route: theta_mid
                            // to evaluate, we need expressions similar to theta_low (above) and theta_high (below)
                            float thetaHighUhigh = 2.0f*atanf(a*sqrtf(N/h));
                     
                            float thetaLowUlow = 
                                    (varSigma>1.0e6f)
                                    ?
                                        sqrtf(-2*invVarSigma*logf(fabsf((l-1.0f)/k+1.0f)))
                                        :
                                        (
                                            (varSigma>80.0f)
                                            ?
                                            (acosf(1.0f + invVarSigma*logf(fabsf((l-1.0f)/k+1.0f))))  // eq. (25)
                                            :
                                            (acosf(invVarSigma*logf(fabsf(expf(-varSigma)
                                                + 2.0f*sinhf(varSigma)*((l-1.0f)/k+1.0f))))) // eq. (24)
                                         )
                                    ;

                            theta = thetaLowUlow*powf((U/l), (logf(thetaLowUlow/thetaHighUhigh)/logf(l/h)));
                            // Eq. (26)
                        }
                    }else{ // least common route: theta_high
                        theta = 2.0f*atanf(a*sqrtf(N/U)); // eq. (21)
                    }
                }

                // -------------------------------------------------
                // Perform Collision
                // -------------------------------------------------

                // rel velocity normalized to a unit vector
                float VRNx = VRx*VRi;
                float VRNr = VRr*VRi;
                float VRNt = VRt*VRi;
                // collision scattering angle
                float sinTheta = sinf(theta);
                float cosTheta = cosf(theta);
                // azimuthal angle of collision
                float phi = TWOPI*V; // random between 0 and 2*pi
                float sinPhi = sinf(phi);
                float cosPhi = cosf(phi);
                // create vector "R" perpendicular to v_rel_com_norm
                float Rx, Rr, Rt;
                if (VRNr!=1.0f){
                    Rx = -VRNt;
                    Rr = 0.0f;
                    Rt = VRNx;
                }else{ // avoids the very very unlikely possibility of getting the zero vector
                    Rx = 0.0f;
                    Rr = VRNr;
                    Rt = -VRNt;
                }
                // vector "U" = VRN cross R
                float Ux = VRNr*Rt - VRNt*Rr;
                float Ur = VRNt*Rx - VRNx*Rt;
                float Ut = VRNx*Rr - VRNr*Rx;
                // vector "P" = U*sin(phi) + R*cos(phi)
                float Px = Ux*sinPhi + Rx*cosPhi;
                float Pr = Ur*sinPhi + Rr*cosPhi;
                float Pt = Ut*sinPhi + Rt*cosPhi;
                // Normalize P
                float P2 = Px*Px + Pr*Pr + Pt*Pt;
                float Pi = P2>0.0f ? rsqrtf(P2) : 0.0f; // check for zero
                Px *= Pi;
                Pr *= Pi;
                Pt *= Pi;
                // Vector "T" is the unnormalized rel velocity rotated towards "P" by theta,
                // this is the new velocity of p1 in the COM frame
                float Tx = VRx*cosTheta + (Pr*VRt - Pt*VRr)*sinTheta;
                float Tr = VRr*cosTheta + (Pt*VRx - Px*VRt)*sinTheta;
                float Tt = VRt*cosTheta + (Px*VRr - Pr*VRx)*sinTheta;
                // Vector "C" is the change in velocity in the lab frame
                float Cx = Tx - VRx;
                float Cr = Tr - VRr;
                float Ct = Tt - VRt;
                // Check for NaN's and zero them out if necessary
                if (Cx!=Cx)
                    Cx = 0.0f;
                if (Cr!=Cr)
                    Cr = 0.0f;
                if (Ct!=Ct)
                    Ct = 0.0f;
                // apply collision velocity change to particles
                vx[p1] += Cx;
                vr[p1] += Cr;
                vt[p1] += Ct;
                vx[p2] -= Cx;
                vr[p2] -= Cr;
                vt[p2] -= Ct;
            }
        }
        fusionRate[c] = fusionRateHere; // record fusion rate in this cell
        c += blockDim.x*gridDim.x;      // go to next cell assigned to this process
    }
}