Skip to content
Snippets Groups Projects
particles_bonds.cu 7.52 KiB
Newer Older
  • Learn to ignore specific revisions
  • //
    // particles_bonds.cu
    //    input particles, output bonds
    //    (c) MIT CBA Neil Gershenfeld 6/17/20
    //
    // includes
    //
    #include <iostream>
    #include <string>
    
    Amira Abdel-Rahman (admin)'s avatar
    Amira Abdel-Rahman (admin) committed
    #include <cstring>
    
    using namespace std;
    //
    // variables
    //
    class vars {
       public:
          float *px,*py,*pz; // CPU particles
          float *dpx,*dpy,*dpz; // GPU particles
          float d,ds; // particle pitch, sort pitch
          float xmin,xmax,ymin,ymax,zmin,zmax; // limits
          int grid,block; // GPU
          int lpp; // links per particle
          int ms = 10; // maxsort
          int np,nx,ny,nz; // number of particles, sort
          int *dsort,*dnsort; // sort
          int *links,*nlinks,*dlinks,*dnlinks; // links
       };
    vars v;
    //
    // sort particles
    //
    __global__ void sort_particles(vars v) {
       int index = blockIdx.x*blockDim.x+threadIdx.x;
       int nindex = gridDim.x*blockDim.x;
       int start = (index/float(nindex))*v.np;
       int stop = ((index+1)/float(nindex))*v.np;
       for (int i = start; i < stop; ++i) {
          int ix = (v.nx-1)*(v.dpx[i]-v.xmin)/(v.xmax-v.xmin);
          int iy = (v.ny-1)*(v.dpy[i]-v.ymin)/(v.ymax-v.ymin);
          int iz = (v.nz-1)*(v.dpz[i]-v.zmin)/(v.zmax-v.zmin);
          int isort = v.nz*v.ny*ix+v.nz*iy+iz;
          v.dsort[v.ms*isort
             +atomicAdd(&v.dnsort[isort],1)]
             = i;
          }
       }
    //
    // link particles
    //
    __global__ void link_particles(vars v) {
       int index = blockIdx.x*blockDim.x+threadIdx.x;
       int nindex = gridDim.x*blockDim.x;
       int start = (index/float(nindex))*v.np;
       int stop = ((index+1)/float(nindex))*v.np;
       for (int i = start; i < stop; ++i) {
          int ix = (v.nx-1)*(v.dpx[i]-v.xmin)/(v.xmax-v.xmin);
          int iy = (v.ny-1)*(v.dpy[i]-v.ymin)/(v.ymax-v.ymin);
          int iz = (v.nz-1)*(v.dpz[i]-v.zmin)/(v.zmax-v.zmin);
          for (int dx = -1; dx <= 1; ++dx) {
             if (((ix+dx) < 0) || ((ix+dx) > (v.nx-1))) continue;
             for (int dy = -1; dy <= 1; ++dy) {
                if (((iy+dy) < 0) || ((iy+dy) > (v.ny-1))) continue;
                for (int dz = -1; dz <= 1; ++dz) {
                   if (((iz+dz) < 0) || ((iz+dz) > (v.nz-1))) continue;
                   int isort = v.nz*v.ny*(ix+dx)+v.nz*(iy+dy)+(iz+dz);
                   for (int s = 0; s < v.dnsort[isort]; ++s) {
                      int j = v.dsort[v.ms*isort+s];
                      if (i != j) {
                         float d = sqrtf(
                             pow((v.dpx[i]-v.dpx[j]),2)
                            +pow((v.dpy[i]-v.dpy[j]),2)
                            +pow((v.dpz[i]-v.dpz[j]),2));
                         if (d < 1.1*v.d) {
                            v.dlinks[v.lpp*i+v.dnlinks[i]] = j;
                            v.dnlinks[i] += 1;
                            }
                         }
                      }
                   }
                }
             }
          }
       }
    //
    // CUDA error check
    //
    void cudaCheck(string msg) {
       cudaError err;
       err = cudaGetLastError();
       if (cudaSuccess != err)
       cerr << msg << ": " << cudaGetErrorString(err) << endl;
       }
    //
    // main
    //
    int main(int argc, char** argv) {
    
    Amira Abdel-Rahman (admin)'s avatar
    Amira Abdel-Rahman (admin) committed
       if (argc == 1) {
          cerr << "command line: particles source | particles_bonds [arguments] | bonds sink" << endl;
          cerr << "   grid=(grid size)" << endl;
          cerr << "   block=(block size)" << endl;
    
    Amira Abdel-Rahman (admin)'s avatar
    Amira Abdel-Rahman (admin) committed
       for (int i = 1; i < argc; ++i) {
          if (strstr(argv[i],"grid=") != nullptr)
             v.grid= stoi(argv[i]+5);
          else if (strstr(argv[i],"block=") != nullptr)
             v.block = stoi(argv[i]+6);
          else {
             cerr << "particles_bonds argument not recognized" << endl;
             return 1;
             }
          }
    
       //
       // read and parse inputs
       //
       string s;
       while (1) {
          getline(cin,s);
          if (s.compare("xmin:") == 0) {
             getline(cin,s);
             v.xmin = stof(s);
             }
          else if (s.compare("xmax:") == 0) {
             getline(cin,s);
             v.xmax = stof(s);
             }
          else if (s.compare("ymin:") == 0) {
             getline(cin,s);
             v.ymin = stof(s);
             }
          else if (s.compare("ymax:") == 0) {
             getline(cin,s);
             v.ymax = stof(s);
             }
          else if (s.compare("zmin:") == 0) {
             getline(cin,s);
             v.zmin = stof(s);
             }
          else if (s.compare("zmax:") == 0) {
             getline(cin,s);
             v.zmax = stof(s);
             }
          else if (s.compare("links per particle:") == 0) {
             getline(cin,s);
             v.lpp = stof(s);
             }
          else if (s.compare("number:") == 0) {
             getline(cin,s);
             v.np = stoi(s); // number of particles
             v.px = new float[v.np]; // CPU
             v.py = new float[v.np];
             v.pz = new float[v.np];
             v.ms = 10; // max particles per sort bin
             cudaMalloc(&v.dpx,v.np*sizeof(float)); // GPU
             cudaMalloc(&v.dpy,v.np*sizeof(float));
             cudaMalloc(&v.dpz,v.np*sizeof(float));
             v.links = new int[v.lpp*v.np];
             v.nlinks = new int[v.np];
             cudaMalloc(&v.dlinks,v.lpp*v.np*sizeof(int)); // links
             cudaMalloc(&v.dnlinks,v.np*sizeof(int));
             cudaMemset(v.dnlinks,0,v.np*sizeof(int));
             }
          else if (s.compare("pitch:") == 0) {
             getline(cin,s);
             v.d = stof(s);
             v.ds = 1.1*v.d; // bigger to avoid roundoff at edges
             v.nx = (v.xmax-v.xmin)/v.ds;
             v.ny = (v.ymax-v.ymin)/v.ds;
             v.nz = (v.zmax-v.zmin)/v.ds;
             cudaMalloc(&v.dsort,v.nz*v.ny*v.nx*v.ms*sizeof(int));
             cudaMalloc(&v.dnsort,v.nz*v.ny*v.nx*sizeof(int));
             cudaMemset(v.dnsort,0,v.nz*v.ny*v.nx*sizeof(int));
             }
          else if (s.compare("particles:") == 0) {
             for (int i = 0; i < v.np ; ++i) {
                cin.read((char *)&v.px[i],4);
                cin.read((char *)&v.py[i],4);
                cin.read((char *)&v.pz[i],4);
                }
             }
          else if (s.compare("end:") == 0)
             break;
          }
       cerr << "particles_bonds:" << endl;
       cerr << "   input " << v.np << " particles" << endl;
       //
       // copy to GPU
       //
       cudaMemcpy(v.dpx,v.px,v.np*sizeof(float),
          cudaMemcpyHostToDevice);
       cudaMemcpy(v.dpy,v.py,v.np*sizeof(float),
          cudaMemcpyHostToDevice);
       cudaMemcpy(v.dpz,v.pz,v.np*sizeof(float),
          cudaMemcpyHostToDevice);
       //
       // sort
       //
       sort_particles<<<v.grid,v.block>>>(v);
       cudaCheck("sort");
       //
       // link
       //
       link_particles<<<v.grid,v.block>>>(v);
       cudaCheck("link");
       //
       // synchronize
       //
       cudaDeviceSynchronize();
       //
       // copy to CPU
       //
       cudaMemcpy(v.nlinks,v.dnlinks,v.np*sizeof(int),
          cudaMemcpyDeviceToHost);
       cudaMemcpy(v.links,v.dlinks,v.lpp*v.np*sizeof(int),
          cudaMemcpyDeviceToHost);
       //
       // write bonds
       //
       cout << "type:" << endl;
       cout << "bonds" << endl;
       cout << "format:" << endl;
       cout << "xyz float32" << endl;
       cout << "xmin:" << endl;
       cout << v.xmin << endl;
       cout << "xmax:" << endl;
       cout << v.xmax << endl;
       cout << "ymin:" << endl;
       cout << v.ymin << endl;
       cout << "ymax:" << endl;
       cout << v.ymax << endl;
       cout << "zmin:" << endl;
       cout << v.zmin << endl;
       cout << "zmax:" << endl;
       cout << v.zmax << endl;
       cout << "pitch:" << endl;
       cout << v.d << endl;
       cout << "links per particle:" << endl;
       cout << v.lpp << endl;
       cout << "number:" << endl;
       cout << v.np << endl;
       cout << "particles:" << endl;
       for (int i = 0; i < v.np; ++i) {
          cout.write((char*)&v.px[i],4);
          cout.write((char*)&v.py[i],4);
          cout.write((char*)&v.pz[i],4);
          }
       cout << endl << "nlinks:" << endl;
       for (int i = 0; i < v.np; ++i)
          cout.write((char*)&v.nlinks[i],4);
       cout << endl << "links:" << endl;
       for (int i = 0; i < v.lpp*v.np; ++i)
          cout.write((char*)&v.links[i],4);
       cout << endl << "end:" << endl;
       }