// // particles_bonds.cu // input particles, output bonds // (c) MIT CBA Neil Gershenfeld 6/17/20 // // includes // #include <iostream> #include <string> #include <cstring> using namespace std; // // variables // class vars { public: float *px,*py,*pz; // CPU particles float *dpx,*dpy,*dpz; // GPU particles float d,ds; // particle pitch, sort pitch float xmin,xmax,ymin,ymax,zmin,zmax; // limits int grid,block; // GPU int lpp; // links per particle int ms = 10; // maxsort int np,nx,ny,nz; // number of particles, sort int *dsort,*dnsort; // sort int *links,*nlinks,*dlinks,*dnlinks; // links }; vars v; // // sort particles // __global__ void sort_particles(vars v) { int index = blockIdx.x*blockDim.x+threadIdx.x; int nindex = gridDim.x*blockDim.x; int start = (index/float(nindex))*v.np; int stop = ((index+1)/float(nindex))*v.np; for (int i = start; i < stop; ++i) { int ix = (v.nx-1)*(v.dpx[i]-v.xmin)/(v.xmax-v.xmin); int iy = (v.ny-1)*(v.dpy[i]-v.ymin)/(v.ymax-v.ymin); int iz = (v.nz-1)*(v.dpz[i]-v.zmin)/(v.zmax-v.zmin); int isort = v.nz*v.ny*ix+v.nz*iy+iz; v.dsort[v.ms*isort +atomicAdd(&v.dnsort[isort],1)] = i; } } // // link particles // __global__ void link_particles(vars v) { int index = blockIdx.x*blockDim.x+threadIdx.x; int nindex = gridDim.x*blockDim.x; int start = (index/float(nindex))*v.np; int stop = ((index+1)/float(nindex))*v.np; for (int i = start; i < stop; ++i) { int ix = (v.nx-1)*(v.dpx[i]-v.xmin)/(v.xmax-v.xmin); int iy = (v.ny-1)*(v.dpy[i]-v.ymin)/(v.ymax-v.ymin); int iz = (v.nz-1)*(v.dpz[i]-v.zmin)/(v.zmax-v.zmin); for (int dx = -1; dx <= 1; ++dx) { if (((ix+dx) < 0) || ((ix+dx) > (v.nx-1))) continue; for (int dy = -1; dy <= 1; ++dy) { if (((iy+dy) < 0) || ((iy+dy) > (v.ny-1))) continue; for (int dz = -1; dz <= 1; ++dz) { if (((iz+dz) < 0) || ((iz+dz) > (v.nz-1))) continue; int isort = v.nz*v.ny*(ix+dx)+v.nz*(iy+dy)+(iz+dz); for (int s = 0; s < v.dnsort[isort]; ++s) { int j = v.dsort[v.ms*isort+s]; if (i != j) { float d = sqrtf( pow((v.dpx[i]-v.dpx[j]),2) +pow((v.dpy[i]-v.dpy[j]),2) +pow((v.dpz[i]-v.dpz[j]),2)); if (d < 1.1*v.d) { v.dlinks[v.lpp*i+v.dnlinks[i]] = j; v.dnlinks[i] += 1; } } } } } } } } // // CUDA error check // void cudaCheck(string msg) { cudaError err; err = cudaGetLastError(); if (cudaSuccess != err) cerr << msg << ": " << cudaGetErrorString(err) << endl; } // // main // int main(int argc, char** argv) { if (argc == 1) { cerr << "command line: particles source | particles_bonds [arguments] | bonds sink" << endl; cerr << " grid=(grid size)" << endl; cerr << " block=(block size)" << endl; return 1; } for (int i = 1; i < argc; ++i) { if (strstr(argv[i],"grid=") != nullptr) v.grid= stoi(argv[i]+5); else if (strstr(argv[i],"block=") != nullptr) v.block = stoi(argv[i]+6); else { cerr << "particles_bonds argument not recognized" << endl; return 1; } } // // read and parse inputs // string s; while (1) { getline(cin,s); if (s.compare("xmin:") == 0) { getline(cin,s); v.xmin = stof(s); } else if (s.compare("xmax:") == 0) { getline(cin,s); v.xmax = stof(s); } else if (s.compare("ymin:") == 0) { getline(cin,s); v.ymin = stof(s); } else if (s.compare("ymax:") == 0) { getline(cin,s); v.ymax = stof(s); } else if (s.compare("zmin:") == 0) { getline(cin,s); v.zmin = stof(s); } else if (s.compare("zmax:") == 0) { getline(cin,s); v.zmax = stof(s); } else if (s.compare("links per particle:") == 0) { getline(cin,s); v.lpp = stof(s); } else if (s.compare("number:") == 0) { getline(cin,s); v.np = stoi(s); // number of particles v.px = new float[v.np]; // CPU v.py = new float[v.np]; v.pz = new float[v.np]; v.ms = 10; // max particles per sort bin cudaMalloc(&v.dpx,v.np*sizeof(float)); // GPU cudaMalloc(&v.dpy,v.np*sizeof(float)); cudaMalloc(&v.dpz,v.np*sizeof(float)); v.links = new int[v.lpp*v.np]; v.nlinks = new int[v.np]; cudaMalloc(&v.dlinks,v.lpp*v.np*sizeof(int)); // links cudaMalloc(&v.dnlinks,v.np*sizeof(int)); cudaMemset(v.dnlinks,0,v.np*sizeof(int)); } else if (s.compare("pitch:") == 0) { getline(cin,s); v.d = stof(s); v.ds = 1.1*v.d; // bigger to avoid roundoff at edges v.nx = (v.xmax-v.xmin)/v.ds; v.ny = (v.ymax-v.ymin)/v.ds; v.nz = (v.zmax-v.zmin)/v.ds; cudaMalloc(&v.dsort,v.nz*v.ny*v.nx*v.ms*sizeof(int)); cudaMalloc(&v.dnsort,v.nz*v.ny*v.nx*sizeof(int)); cudaMemset(v.dnsort,0,v.nz*v.ny*v.nx*sizeof(int)); } else if (s.compare("particles:") == 0) { for (int i = 0; i < v.np ; ++i) { cin.read((char *)&v.px[i],4); cin.read((char *)&v.py[i],4); cin.read((char *)&v.pz[i],4); } } else if (s.compare("end:") == 0) break; } cerr << "particles_bonds:" << endl; cerr << " input " << v.np << " particles" << endl; // // copy to GPU // cudaMemcpy(v.dpx,v.px,v.np*sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(v.dpy,v.py,v.np*sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(v.dpz,v.pz,v.np*sizeof(float), cudaMemcpyHostToDevice); // // sort // sort_particles<<<v.grid,v.block>>>(v); cudaCheck("sort"); // // link // link_particles<<<v.grid,v.block>>>(v); cudaCheck("link"); // // synchronize // cudaDeviceSynchronize(); // // copy to CPU // cudaMemcpy(v.nlinks,v.dnlinks,v.np*sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(v.links,v.dlinks,v.lpp*v.np*sizeof(int), cudaMemcpyDeviceToHost); // // write bonds // cout << "type:" << endl; cout << "bonds" << endl; cout << "format:" << endl; cout << "xyz float32" << endl; cout << "xmin:" << endl; cout << v.xmin << endl; cout << "xmax:" << endl; cout << v.xmax << endl; cout << "ymin:" << endl; cout << v.ymin << endl; cout << "ymax:" << endl; cout << v.ymax << endl; cout << "zmin:" << endl; cout << v.zmin << endl; cout << "zmax:" << endl; cout << v.zmax << endl; cout << "pitch:" << endl; cout << v.d << endl; cout << "links per particle:" << endl; cout << v.lpp << endl; cout << "number:" << endl; cout << v.np << endl; cout << "particles:" << endl; for (int i = 0; i < v.np; ++i) { cout.write((char*)&v.px[i],4); cout.write((char*)&v.py[i],4); cout.write((char*)&v.pz[i],4); } cout << endl << "nlinks:" << endl; for (int i = 0; i < v.np; ++i) cout.write((char*)&v.nlinks[i],4); cout << endl << "links:" << endl; for (int i = 0; i < v.lpp*v.np; ++i) cout.write((char*)&v.links[i],4); cout << endl << "end:" << endl; }