vs怎么运行代码（vs怎样运行程序代码）

大家好，我是讯享网，很高兴认识大家。

在B站上找到的视频，学习之后，自己总结了一下，归档留存以后使用

有人下载了CUDA Toolkit和cuDNN，但是我这边只安装了CUDA Toolkit就能在vs中运行代码了

CUDA Toolkit下载连接：https://developer.nvidia.com/cuda-toolkit-archive（我下载的是12.3，当时的最新版）

cuDNN Archive | NVIDIA Developer下载链接：https://developer.nvidia.com/rdp/cudnn-archive（我下载的是v8.9.5，能支持CUDA 12.x）

选择安装部件时，我是全选的，怕不全选安装不全，至少能用，不差这些空间

找到一个固定能找到的安装位置，此处是我安装的位置

安装完成之后就可以在vs中看到sample代码了

后面使用vs写代码：

3.配置库（记住！！配置时先改为debug和x64，releas版本应该也是同理）

4.配置静态链接库路径

至此配置完成，与我参考的帖子有些区别，但至少能跑代码了

下面在源文件处创建test.cu文件，这个代码是自带的测试程序，这个能跑了，cuda就差不多了

#include “cuda_runtime.h”

#include “device_launch_parameters.h”

#include <stdio.h>

cudaError_t addWithCuda(int* c, const int* a, const int* b, unsigned int size);

global void addKernel(int* c, const int* a, const int* b)

{

int i = threadIdx.x;

c[i] = a[i] + b[i];

}

int main()

{

const int arraySize = 5;

const int a[arraySize] = { 1, 2, 3, 4, 5 };

const int b[arraySize] = { 10, 20, 30, 40, 50 };

int c[arraySize] = { 0 };

// Add vectors in parallel.

cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “addWithCuda failed!”);

return 1;

}

printf(“{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d} ”,

c[0], c[1], c[2], c[3], c[4]);

// cudaDeviceReset must be called before exiting in order for profiling and

// tracing tools such as Nsight and Visual Profiler to show complete traces.

cudaStatus = cudaDeviceReset();

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaDeviceReset failed!”);

return 1;

}

return 0;

}

// Helper function for using CUDA to add vectors in parallel.

cudaError_t addWithCuda(int* c, const int* a, const int* b, unsigned int size)

{

int* dev_a = 0;

int* dev_b = 0;

int* dev_c = 0;

cudaError_t cudaStatus;

// Choose which GPU to run on, change this on a multi-GPU system.

cudaStatus = cudaSetDevice(0);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaSetDevice failed! Do you have a CUDA-capable GPU installed?”);

goto Error;

}

// Allocate GPU buffers for three vectors (two input, one output) .

cudaStatus = cudaMalloc((void)&dev_c, size * sizeof(int));

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaMalloc failed!”);

goto Error;

}

cudaStatus = cudaMalloc((void)&dev_a, size * sizeof(int));

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaMalloc failed!”);

goto Error;

}

cudaStatus = cudaMalloc((void)&dev_b, size * sizeof(int));

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaMalloc failed!”);

goto Error;

}

// Copy input vectors from host memory to GPU buffers.

讯享网

cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaMemcpy failed!”);

goto Error;

}

cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaMemcpy failed!”);

goto Error;

}

// Launch a kernel on the GPU with one thread for each element.

addKernel << <1, size >> > (dev_c, dev_a, dev_b);

// Check for any errors launching the kernel

cudaStatus = cudaGetLastError();

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “addKernel launch failed: %s ”, cudaGetErrorString(cudaStatus));

goto Error;

}

// cudaDeviceSynchronize waits for the kernel to finish, and returns

// any errors encountered during the launch.

cudaStatus = cudaDeviceSynchronize();

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaDeviceSynchronize returned error code %d after launching addKernel! ”, cudaStatus);

goto Error;

}

// Copy output vector from GPU buffer to host memory.

cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, “cudaMemcpy failed!”);

goto Error;

}

Error:

cudaFree(dev_c);

cudaFree(dev_a);

cudaFree(dev_b);

return cudaStatus;

}

下面再给个代码，这个是查看显卡设备信息的代码

#include “cuda_runtime.h”

#include “device_launch_parameters.h”

#include <stdio.h>

int main() {

int deviceCount;

cudaGetDeviceCount(&deviceCount);

int dev;

for (dev = 0; dev < deviceCount; dev++)

{

int driver_version(0), runtime_version(0);

cudaDeviceProp deviceProp;

cudaGetDeviceProperties(&deviceProp, dev);

if (dev == 0)

if (deviceProp.minor = 9999 && deviceProp.major == 9999)

printf(“ ”);

printf(“ Device%d:”%s“ ”, dev, deviceProp.name);

cudaDriverGetVersion(&driver_version);

printf(“CUDA驱动版本: %d.%d ”, driver_version / 1000, (driver_version % 1000) / 10);

cudaRuntimeGetVersion(&runtime_version);

printf(“CUDA运行时版本: %d.%d ”, runtime_version / 1000, (runtime_version % 1000) / 10);

printf(“设备计算能力: %d.%d ”, deviceProp.major, deviceProp.minor);

printf(“Total amount of Global Memory: 齝es ”, deviceProp.totalGlobalMem);

printf(“Number of SMs: %d ”, deviceProp.multiProcessorCount);

printf(“Total amount of Constant Memory: 齝es ”, deviceProp.totalConstMem);

printf(“Total amount of Shared Memory per block: 齝es ”, deviceProp.sharedMemPerBlock);

printf(“Total number of registers available per block: %d ”, deviceProp.regsPerBlock);

printf(“Warp size: %d ”, deviceProp.warpSize);

printf(“Maximum number of threads per SM: %d ”, deviceProp.maxThreadsPerMultiProcessor);

printf(“Maximum number of threads per block: %d ”, deviceProp.maxThreadsPerBlock);

printf(“Maximum size of each dimension of a block: %d x %d x %d ”, deviceProp.maxThreadsDim[0],

deviceProp.maxThreadsDim[1],

deviceProp.maxThreadsDim[2]);

printf(“Maximum size of each dimension of a grid: %d x %d x %d ”, deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]);

printf(“Maximum memory pitch: 齝es ”, deviceProp.memPitch);

printf(“Texture alignmemt: 齝es ”, deviceProp.texturePitchAlignment);

printf(“Clock rate: %.2f GHz ”, deviceProp.clockRate * 1e-6f);

printf(“Memory Clock rate: %.0f MHz ”, deviceProp.memoryClockRate * 1e-3f);

printf(“Memory Bus Width: %d-bit ”, deviceProp.memoryBusWidth);

}

return 0;

}

具体应该怎么写cuda代码还需要在C的基础上单独学习，主要是内存和并行核的控制

参考：

https://blog.csdn.net/ashiners/article/details/

vs怎么运行代码（vs怎样运行程序代码）

相关推荐