// // bonds_stress_strain.cu // input bonds, apply stress, output strain // (c) MIT CBA Neil Gershenfeld 6/30/20 // // includes // #include <iostream> #include <string> #include <cstring> #include <chrono> using namespace std; // // variables // class vars { public: float3 *hp,*hf; // host position, force float3 *dp,*dv,*df; // device position, velocity, force float *s,*ds; // strain float d,dmax,sd; // particle pitch, max, sort float step; // strain step size float bond; // bond break multiplier char load[100]; // load type float percent; // zlimit float cxmin,czmin,crmin,cxmax,czmax,crmax; // ycylinder float lxmin,lzmin,lymin,lxmax,lzmax,lymax; // loadbox float sxmin,szmin,symin,sxmax,szmax,symax; // supportbox float stress,strain; // stress, strain accumulators int lpp; // links per particle float pxmin,pxmax,pymin,pymax,pzmin,pzmax; // particle limits float xmin,xmax,ymin,ymax,zmin,zmax; // sort limits float spring,mass,diss,dt; // integration int nloop,grid,block; // update loop, GPU threads int ms = 10; // maxsort int np,nx,ny,nz; // number of particles, sort int *dsort,*dnsort; // sort int *links,*nlinks,*dlinks,*dnlinks; // links char *type,*dtype; // particle type // p:particle b:bottom t:top }; vars v; // // assign particles // void assign_particles(vars v) { if (strcmp(v.load,"zlimit") == 0) { float zbot = v.pzmin+(v.pzmax-v.pzmin)*v.percent/100.0; float ztop = v.pzmax-(v.pzmax-v.pzmin)*v.percent/100.0; for (int i = 0; i < v.np; ++i){ if (v.hp[i].z < zbot){ v.type[i] = 'b'; } else if (v.hp[i].z > ztop){ v.type[i] = 't'; } else v.type[i] = 'p'; } } else if (strcmp(v.load,"ycylinder") == 0) { for (int i = 0; i < v.np; ++i){ float rmin = sqrt(pow(v.hp[i].z-v.czmin,2) +pow(v.hp[i].x-v.cxmin,2)); float rmax = sqrt(pow(v.hp[i].z-v.czmax,2) +pow(v.hp[i].x-v.cxmax,2)); if (rmin < v.crmin){ v.type[i] = 'b'; } else if (rmax < v.crmax){ v.type[i] = 't'; } else v.type[i] = 'p'; } } else if (strcmp(v.load,"loadbox") == 0) { // float zbot = v.pzmin+(v.pzmax-v.pzmin)*v.percent/100.0; // float ztop = v.pzmax-(v.pzmax-v.pzmin)*v.percent/100.0; cout << "v.pzmin:" <<v.pzmin<< endl; cout << "v.pzmax:" <<v.pzmax<< endl; for (int i = 0; i < v.np; ++i){ if (v.hp[i].z <= v.szmax &&v.hp[i].z >= v.szmin && v.hp[i].y <= v.symax &&v.hp[i].y >= v.symin && v.hp[i].x <= v.sxmax &&v.hp[i].x >= v.sxmin){ v.type[i] = 'b'; } else if (v.hp[i].z <= v.lzmax &&v.hp[i].z >= v.lzmin && v.hp[i].y <= v.lymax &&v.hp[i].y >= v.lymin && v.hp[i].x <= v.lxmax &&v.hp[i].x >= v.lxmin){ v.type[i] = 't'; } else v.type[i] = 'p'; } } else { cerr << "bonds_stress_strain: load type not defined" << endl; exit(1); } } // // sort particles // __global__ void sort_particles(vars v) { int index = blockIdx.x*blockDim.x+threadIdx.x; int nindex = gridDim.x*blockDim.x; int start = (index/float(nindex))*v.np; int stop = ((index+1)/float(nindex))*v.np; for (int i = start; i < stop; ++i) { int ix = (v.nx-1)*(v.dp[i].x-v.xmin)/(v.xmax-v.xmin); if ((ix < 0) || (ix > v.nx-1)) continue; int iy = (v.ny-1)*(v.dp[i].y-v.ymin)/(v.ymax-v.ymin); if ((iy < 0) || (iy > v.ny-1)) continue; int iz = (v.nz-1)*(v.dp[i].z-v.zmin)/(v.zmax-v.zmin); if ((iz < 0) || (iz > v.nz-1)) continue; int isort = v.nz*v.ny*ix+v.nz*iy+iz; v.dsort[v.ms*isort+atomicAdd(&v.dnsort[isort],1)] = i; } } // // test for link // __device__ bool linked(int i,int j,vars v) { for (int l = 0; l < v.dnlinks[i]; ++l) { if (j == v.dlinks[v.lpp*i+l]) return true; } return false; } // // find force // __device__ void find_force(int i,int j,vars v) { float dx = v.dp[i].x-v.dp[j].x; float dy = v.dp[i].y-v.dp[j].y; float dz = v.dp[i].z-v.dp[j].z; if (linked(i,j,v)) { float d = sqrt(dx*dx+dy*dy+dz*dz); float nx = dx/d; float ny = dy/d; float nz = dz/d; float f = -v.spring*(d-v.d)/v.d; float dvx = v.dv[i].x-v.dv[j].x; float dvy = v.dv[i].y-v.dv[j].y; float dvz = v.dv[i].z-v.dv[j].z; f -= v.diss*(dvx*dx+dvy*dy+dvz*dz)/d; atomicAdd(&(v.df[i].x),f*nx); atomicAdd(&(v.df[i].y),f*ny); atomicAdd(&(v.df[i].z),f*nz); atomicAdd(&(v.df[j].x),-f*nx); atomicAdd(&(v.df[j].y),-f*ny); atomicAdd(&(v.df[j].z),-f*nz); } else { float d = sqrt(dx*dx+dy*dy+dz*dz); if (d < v.d) { float nx = dx/d; float ny = dy/d; float nz = dz/d; float f = -v.spring*(d-v.d)/v.d; float dvx = v.dv[i].x-v.dv[j].x; float dvy = v.dv[i].y-v.dv[j].y; float dvz = v.dv[i].z-v.dv[j].z; f -= v.diss*(dvx*dx+dvy*dy+dvz*dz)/d; atomicAdd(&(v.df[i].x),f*nx); atomicAdd(&(v.df[i].y),f*ny); atomicAdd(&(v.df[i].z),f*nz); atomicAdd(&(v.df[j].x),-f*nx); atomicAdd(&(v.df[j].y),-f*ny); atomicAdd(&(v.df[j].z),-f*nz); } } } // // calculate forces // __global__ void calculate_forces(vars v) { int index = blockIdx.x*blockDim.x+threadIdx.x; int nindex = gridDim.x*blockDim.x; int start = (index/float(nindex))*v.np; int stop = ((index+1)/float(nindex))*v.np; for (int i = start; i < stop; ++i) { float x = v.dp[i].x; float y = v.dp[i].y; float z = v.dp[i].z; int ix = (v.nx-1)*(v.dp[i].x-v.xmin)/(v.xmax-v.xmin); int iy = (v.ny-1)*(v.dp[i].y-v.ymin)/(v.ymax-v.ymin); int iz = (v.nz-1)*(v.dp[i].z-v.zmin)/(v.zmax-v.zmin); // // loop over links to find strain // v.ds[i] = 0; int norm = 0; for (int l = 0; l < v.dnlinks[i]; ++l) { int j = v.dlinks[v.lpp*i+l]; if (j == -1) // // ignore deleted link // continue; float dx = x-v.dp[j].x; float dy = y-v.dp[j].y; float dz = z-v.dp[j].z; float d = sqrtf(dx*dx+dy*dy+dz*dz); // // accumulate // v.ds[i] += abs(d-v.d)/v.d; norm += 1; // // check for broken links // if (d > v.dmax) // // yes, mark for deletion // v.dlinks[v.lpp*i+l] = -1; } v.ds[i] = v.ds[i]/norm; // // loop over sort to find forces // for (int dx = -1; dx <= 1; ++dx) { if (((ix+dx) < 0) || ((ix+dx) > (v.nx-1))) continue; for (int dy = -1; dy <= 1; ++dy) { if (((iy+dy) < 0) || ((iy+dy) > (v.ny-1))) continue; for (int dz = -1; dz <= 1; ++dz) { if (((iz+dz) < 0) || ((iz+dz) > (v.nz-1))) continue; int isort = v.nz*v.ny*(ix+dx)+v.nz*(iy+dy)+(iz+dz); for (int s = 0; s < v.dnsort[isort]; ++s) { int j = v.dsort[v.ms*isort+s]; if (i < j) find_force(i,j,v); } } } } } } // // integrate positions // __global__ void integrate_positions(vars v) { int index = blockIdx.x*blockDim.x+threadIdx.x; int nindex = gridDim.x*blockDim.x; int start = (index/float(nindex))*v.np; int stop = ((index+1)/float(nindex))*v.np; float ax,ay,az; for (int i = start; i < stop; ++i) { if (v.dtype[i] == 'b') v.dp[i].z -= v.step; else if (v.dtype[i] == 't') v.dp[i].z += v.step; else { ax = v.df[i].x/v.mass; ay = v.df[i].y/v.mass; az = v.df[i].z/v.mass; v.dv[i].x += v.dt*ax; v.dv[i].y += v.dt*ay; v.dv[i].z += v.dt*az; v.dp[i].x += v.dt*v.dv[i].x; v.dp[i].y += v.dt*v.dv[i].y; v.dp[i].z += v.dt*v.dv[i].z; } } } // // CUDA error check // void cudaCheck(string msg) { cudaError err; err = cudaGetLastError(); if (cudaSuccess != err) cerr << msg << ": " << cudaGetErrorString(err) << endl; } // // main // int main(int argc, char** argv) { if (argc == 1) { cerr << "command line: bonds source | bonds_stress_strain [arguments] | strain sink" << endl; cerr << " grid=(grid size)" << endl; cerr << " block=(block size)" << endl; cerr << " spring=(spring constant)" << endl; cerr << " mass=(particle mass)" << endl; cerr << " dissipation=(damping)" << endl; cerr << " dt=(time step)" << endl; cerr << " loop=(steps between display)" << endl; cerr << " step=(strain step)" << endl; cerr << " bond=(bond breaking distance, lattice units)" << endl; cerr << " load=(type)" << endl; cerr << " zlimit" << endl; cerr << " percent=(top and bottom z percent)" << endl; cerr << " ycylinder" << endl; cerr << " xmin=(bottom x)" << endl; cerr << " zmin=(bottom z)" << endl; cerr << " rmin=(bottom r)" << endl; cerr << " xmax=(top x)" << endl; cerr << " zmax=(top z)" << endl; cerr << " rmax=(top r)" << endl; return 1; } for (int i = 1; i < argc; ++i) { if (strstr(argv[i],"grid=") != nullptr) v.grid = stoi(argv[i]+5); else if (strstr(argv[i],"block=") != nullptr) v.block = stoi(argv[i]+6); else if (strstr(argv[i],"spring=") != nullptr) v.spring = stof(argv[i]+7); else if (strstr(argv[i],"mass=") != nullptr) v.mass = stof(argv[i]+5); else if (strstr(argv[i],"dissipation=") != nullptr) v.diss = stof(argv[i]+12); else if (strstr(argv[i],"dt=") != nullptr) v.dt = stof(argv[i]+3); else if (strstr(argv[i],"loop=") != nullptr) v.nloop = stoi(argv[i]+5); else if (strstr(argv[i],"step=") != nullptr) v.step = stof(argv[i]+5); else if (strstr(argv[i],"bond=") != nullptr) v.bond = stof(argv[i]+5); else if (strstr(argv[i],"load=") != nullptr) strcpy(v.load,argv[i]+5); else if (strstr(argv[i],"lxmin=") != nullptr) v.lxmin = stof(argv[i]+6); else if (strstr(argv[i],"lymin=") != nullptr) v.lymin = stof(argv[i]+6); else if (strstr(argv[i],"lzmin=") != nullptr) v.lzmin = stof(argv[i]+6); else if (strstr(argv[i],"lxmax=") != nullptr) v.lxmax = stof(argv[i]+6); else if (strstr(argv[i],"lymax=") != nullptr) v.lymax = stof(argv[i]+6); else if (strstr(argv[i],"lzmax=") != nullptr) v.lzmax = stof(argv[i]+6); else if (strstr(argv[i],"sxmin=") != nullptr) v.sxmin = stof(argv[i]+6); else if (strstr(argv[i],"symin=") != nullptr) v.symin = stof(argv[i]+6); else if (strstr(argv[i],"szmin=") != nullptr) v.szmin = stof(argv[i]+6); else if (strstr(argv[i],"sxmax=") != nullptr) v.sxmax = stof(argv[i]+6); else if (strstr(argv[i],"symax=") != nullptr) v.symax = stof(argv[i]+6); else if (strstr(argv[i],"szmax=") != nullptr) v.szmax = stof(argv[i]+6); else if (strstr(argv[i],"percent=") != nullptr) v.percent = stof(argv[i]+8); else if (strstr(argv[i],"xmin=") != nullptr) v.cxmin = stof(argv[i]+5); else if (strstr(argv[i],"zmin=") != nullptr) v.czmin = stof(argv[i]+5); else if (strstr(argv[i],"rmin=") != nullptr) v.crmin = stof(argv[i]+5); else if (strstr(argv[i],"xmax=") != nullptr) v.cxmax = stof(argv[i]+5); else if (strstr(argv[i],"zmax=") != nullptr) v.czmax = stof(argv[i]+5); else if (strstr(argv[i],"rmax=") != nullptr) v.crmax = stof(argv[i]+5); else { cerr << "bonds_stress_strain argument not recognized" << endl; return 1; } } // // read and parse inputs // string s; while (1) { getline(cin,s); if (s.compare("xmin:") == 0) { getline(cin,s); v.pxmin = stof(s); } else if (s.compare("xmax:") == 0) { getline(cin,s); v.pxmax = stof(s); } else if (s.compare("ymin:") == 0) { getline(cin,s); v.pymin = stof(s); } else if (s.compare("ymax:") == 0) { getline(cin,s); v.pymax = stof(s); } else if (s.compare("zmin:") == 0) { getline(cin,s); v.pzmin = stof(s); } else if (s.compare("zmax:") == 0) { getline(cin,s); v.pzmax = stof(s); } else if (s.compare("pitch:") == 0) { getline(cin,s); v.d = stof(s); v.dmax = v.bond*v.d; // bond breaking v.sd = 1.01*v.d; // sort bins bigger to avoid roundoff at edges float xmid = (v.pxmax+v.pxmin)/2.0; float ymid = (v.pymax+v.pymin)/2.0; float zmid = (v.pzmax+v.pzmin)/2.0; v.xmin = xmid+1.25*(v.pxmin-xmid); // enlarge sort volume v.xmax = xmid+1.25*(v.pxmax-xmid); v.ymin = ymid+1.25*(v.pymin-ymid); v.ymax = ymid+1.25*(v.pymax-ymid); v.zmin = zmid+1.25*(v.pzmin-zmid); v.zmax = zmid+1.25*(v.pzmax-zmid); v.nx = (v.xmax-v.xmin)/v.sd; v.ny = (v.ymax-v.ymin)/v.sd; v.nz = (v.zmax-v.zmin)/v.sd; cudaMalloc(&v.dsort,v.nz*v.ny*v.nx*v.ms*sizeof(int)); cudaMalloc(&v.dnsort,v.nz*v.ny*v.nx*sizeof(int)); cudaMemset(v.dnsort,0,v.nz*v.ny*v.nx*sizeof(int)); cudaCheck("malloc pitch"); } else if (s.compare("links per particle:") == 0) { getline(cin,s); v.lpp = stoi(s); } else if (s.compare("number:") == 0) { getline(cin,s); v.np = stoi(s); // number of particles cudaMallocHost(&v.hp,v.np*sizeof(float3)); // host arrays cudaMallocHost(&v.hf,v.np*sizeof(float3)); cudaMallocHost(&v.s,v.np*sizeof(float)); v.links = new int[v.lpp*v.np]; v.nlinks = new int[v.np]; v.type = new char[v.np]; cudaMalloc(&v.dp,v.np*sizeof(float3)); // device arrays cudaMalloc(&v.dv,v.np*sizeof(float3)); cudaMalloc(&v.df,v.np*sizeof(float3)); cudaMalloc(&v.ds,v.np*sizeof(float)); cudaMalloc(&v.dlinks,v.lpp*v.np*sizeof(int)); cudaMalloc(&v.dnlinks,v.np*sizeof(int)); cudaMemset(v.dnlinks,0,v.np*sizeof(int)); cudaMalloc(&v.dtype,v.np*sizeof(char)); cudaCheck("malloc number"); } else if (s.compare("particles:") == 0) { for (int i = 0; i < v.np ; ++i) { cin.read((char *)&v.hp[i].x,4); cin.read((char *)&v.hp[i].y,4); cin.read((char *)&v.hp[i].z,4); } } else if (s.compare("nlinks:") == 0) { for (int i = 0; i < v.np ; ++i) cin.read((char *)&v.nlinks[i],4); } else if (s.compare("links:") == 0) { for (int i = 0; i < v.lpp*v.np ; ++i) cin.read((char *)&v.links[i],4); } else if (s.compare("end:") == 0) break; } cerr << "bonds_stress_strain:" << endl; cerr << " input " << v.np << " particles" << endl; // // assign particle types // assign_particles(v); // // copy data to GPU // cudaMemcpy(v.dp,v.hp,v.np*sizeof(float3), cudaMemcpyHostToDevice); cudaMemcpy(v.dnlinks,v.nlinks,v.np*sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(v.dlinks,v.links,v.lpp*v.np*sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(v.dtype,v.type,v.np*sizeof(char), cudaMemcpyHostToDevice); cudaCheck("copy to GPU"); // // start output // cout << "type:" << endl; cout << "bonds" << endl; cout << "format:" << endl; cout << "xyz float32" << endl; cout << "pitch:" << endl; cout << v.d << endl; cout << "links per particle:" << endl; cout << v.lpp << endl; cout << "number:" << endl; cout << v.np << endl; cout << "xmin:" << endl; cout << v.xmin << endl; cout << "xmax:" << endl; cout << v.xmax << endl; cout << "ymin:" << endl; cout << v.ymin << endl; cout << "ymax:" << endl; cout << v.ymax << endl; cout << "zmin:" << endl; cout << v.zmin << endl; cout << "zmax:" << endl; cout << v.zmax << endl; cout << "particles:" << endl; // for (int i = 0; i < v.np; ++i) { // cout.write((char*)&v.hp[i].y,4); // cout.write((char*)&v.hp[i].y,4); // cout.write((char*)&v.hp[i].y,4); // } // cout << endl << "type:" << endl; // for (int i = 0; i < v.np; ++i) // cout.write(&v.type[i],1); // cout << endl << "continue:" << endl; // // start main loop // while (1) { // // update loop // cudaDeviceSynchronize(); auto t0 = chrono::high_resolution_clock::now(); // cout << "outer loop" << endl; for (int i = 0; i < v.nloop; ++i) { // cout << "inner loop" << endl; // // sort // cudaMemset(v.dnsort,0,v.nz*v.ny*v.nx*sizeof(int)); sort_particles<<<v.grid,v.block>>>(v); cudaCheck("sort"); // // forces // cudaMemset(v.df,0,v.np*sizeof(float3)); calculate_forces<<<v.grid,v.block>>>(v); cudaCheck("forces"); // // integrate // integrate_positions<<<v.grid,v.block>>>(v); cudaCheck("integrate"); } cudaDeviceSynchronize(); auto t1 = chrono::high_resolution_clock::now(); float dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count(); // fprintf(stderr,"\r mps: %.0f",(v.nloop*v.np)/dt); // // measure timing // t0 = chrono::high_resolution_clock::now(); cudaMemset(v.dnsort,0,v.nz*v.ny*v.nx*sizeof(int)); sort_particles<<<v.grid,v.block>>>(v); cudaCheck("time sort"); cudaDeviceSynchronize(); t1 = chrono::high_resolution_clock::now(); dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count(); cerr << " srt: " << dt; // t0 = chrono::high_resolution_clock::now(); cudaMemset(v.df,0,v.np*sizeof(float3)); calculate_forces<<<v.grid,v.block>>>(v); cudaCheck("time forces"); cudaDeviceSynchronize(); t1 = chrono::high_resolution_clock::now(); dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count(); cerr << " frc: " << dt; // t0 = chrono::high_resolution_clock::now(); integrate_positions<<<v.grid,v.block>>>(v); cudaCheck("time integrate"); cudaDeviceSynchronize(); t1 = chrono::high_resolution_clock::now(); dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count(); cerr << " int: " << dt; // // copy data to CPU // t0 = chrono::high_resolution_clock::now(); cudaMemcpy(v.hp,v.dp,v.np*sizeof(float3),cudaMemcpyDeviceToHost); cudaMemcpy(v.hf,v.df,v.np*sizeof(float3),cudaMemcpyDeviceToHost); cudaMemcpy(v.s,v.ds,v.np*sizeof(int),cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaCheck("copy to CPU"); t1 = chrono::high_resolution_clock::now(); dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count(); cerr << " cpy: " << dt; // // stress // v.stress = 0; for (int i = 0; i < v.np; ++i) { if (v.type[i] == 'b') v.stress -= v.hf[i].z; else if (v.type[i] == 't') v.stress += v.hf[i].z; } //cerr << endl << v.stress << endl; // // output // t0 = chrono::high_resolution_clock::now(); // cout << "particles:" << endl; // for (int i = 0; i < v.np; ++i) { // cout.write((char*)&v.hp[i].x,4); // cout.write((char*)&v.hp[i].y,4); // cout.write((char*)&v.hp[i].z,4); // } // cout << endl << "strain:" << endl; // for (int i = 0; i < v.np; ++i) // cout.write((char*)&v.s[i],4); // cout << endl << "continue:" << endl; t1 = chrono::high_resolution_clock::now(); dt = chrono::duration_cast<std::chrono::microseconds>(t1-t0).count(); // fprintf(stderr," out: %.3g ",dt); // cerr << "(us)" << flush; } }