// cppamp1.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <amp.h>
#include <iostream>
using namespace concurrency;
const int size = 5;
inline unsigned long Log2 (unsigned long num) restrict(amp)
{
unsigned long index = 0;
while (num > 1) {
index++;
num = (num+1)>>1;
}
return index;
}
void CppAmpMethod() {
int aCPP[] = {1, 2, 3, 4, 5};
int bCPP[] = {6, 7, 8, 9, 10};
int sumCPP[size];
// Create C++ AMP objects.
array_view<const int, 1> a(size, aCPP);
array_view<const int, 1> b(size, bCPP);
array_view<int, 1> sum(size, sumCPP);
sum.discard_data();
parallel_for_each(
// Define the compute domain, which is the set of threads that are created.
sum.extent,
// Define the code to run on each thread on the accelerator.
[=](index<1> idx) restrict(amp)
{
sum[idx] = a[idx] + b[idx];
}
);
// Print the results. The expected output is "7, 9, 11, 13, 15".
for (int i = 0; i < size; i++) {
std::cout << sum[i] << "\n";
}
}
void AddElements(index<1> idx, array_view<int, 1> sum, array_view<int, 1> a, array_view<int, 1> b) restrict(amp)
{
sum[idx] = a[idx] + b[idx] + Log2(b[idx]);
}
void AddArraysWithFunction() {
int aCPP[] = {1, 2, 3, 4, 5};
int bCPP[] = {6, 7, 8, 9, 10};
int sumCPP[5] = {0, 0, 0, 0, 0};
array_view<int, 1> a(5, aCPP);
array_view<int, 1> b(5, bCPP);
array_view<int, 1> sum(5, sumCPP);
parallel_for_each(
sum.extent,
[=](index<1> idx) restrict(amp)
{
AddElements(idx, sum, a, b);
}
);
for (int i = 0; i < 5; i++) {
std::cout << sum[i] << "\n";
}
}
//METHOD 1: tile_static int lds[2][3];
void Idx_TiledAmp(tiled_index<2,3> idx,array_view<int, 2> input, array_view<int, 2> gid, array_view<int, 2> tid, array_view<int, 2> lid, int lds[2][3]) restrict(amp)
{
lds[idx.local[0]][idx.local[1]] = idx.global[0] | idx.global[1]* 100;
idx.barrier.wait();
gid[idx.global] = lds[idx.local[0]][idx.local[1]] ;
tid[idx.global] = idx.tile[0] | idx.tile[1] * 10000;
lid[idx.global] = idx.local[0] | idx.local[1] * 10000;
}
//convert to 1d array
void Idx_TiledAmp2(tiled_index<2,3> idx,array_view<int, 2> input, array_view<int, 2> gid, array_view<int, 2> tid, array_view<int, 2> lid, int* lds) restrict(amp)
{
lds[idx.local[0]*2 +idx.local[1]] = idx.global[0] | idx.global[1]* 100;
idx.barrier.wait();
gid[idx.global] = lds[idx.local[0]*2+idx.local[1]];
tid[idx.global] = idx.tile[0] | idx.tile[1] * 10000;
lid[idx.global] = idx.local[0] | idx.local[1] * 10000;
}
void TiledAmp()
{
// Sample data:
int sampledata[] = {
1, 2, 3, 4, 5, 6,
11, 12, 13, 14, 15, 16,
21, 22, 23, 24, 25, 26,
31, 32, 33, 34, 35, 36,};
// The tiles:
// 2 2 9 7 1 4
// 4 4 8 8 3 4
//
// 1 5 1 2 5 2
// 6 8 3 2 7 2
// Averages:
int averagedata[] = {
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
};
int gid_data[] = {
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
};
int tid_data[] = {
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
};
int lid_data[] = {
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
};
array_view<int, 2> sample(4, 6, sampledata);
array_view<int, 2> gid(4, 6, gid_data);
array_view<int, 2> tid(4, 6, tid_data);
array_view<int, 2> lid(4, 6, lid_data);
array_view<int, 2> average(4, 6, averagedata);
parallel_for_each(
// Create threads for sample.extent and divide the extent into 2 x 2 tiles.
sample.extent.tile<2,3>(),
[=](tiled_index<2,3> idx) restrict(amp)
{
tile_static int sample2[2][3];
//Idx_TiledAmp(idx, sample, gid, tid, lid, sample2);
Idx_TiledAmp2(idx, sample, gid, tid, lid, &sample2[0][0]);
}
);
std::cout << "sample\n";
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 6; j++) {
std::cout << sample(i,j) << " ";
}
std::cout << "\n";
}
std::cout << "gid\n";
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 6; j++) {
std::cout << gid(i,j) << " ";
}
std::cout << "\n";
}
std::cout << "\ntid\n";
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 6; j++) {
std::cout << tid(i,j) << " ";
}
std::cout << "\n";
}
std::cout << "\nlid\n";
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 6; j++) {
std::cout << lid(i,j) << " ";
}
std::cout << "\n";
}
}
int _tmain(int argc, _TCHAR* argv[])
{
//CppAmpMethod();
//AddArraysWithFunction();
int a=0x01020304;
BYTE* b;
b = (BYTE*)&a;
int bb= (int)*b;
TiledAmp();
char c = getc(stdin);
c = getc(stdin);
return 0;
}