1
0

4 Commitit f2c4503e19 ... 857eb1bd9d

Tekijä SHA1 Viesti Päivämäärä
  hk 857eb1bd9d minor typo fixed 5 vuotta sitten
  hk 09abc43496 _ghf_wdcExecWS_Kern renamed to _ghf_wdcBExecWS_Kern 5 vuotta sitten
  hk 244200c7cd _ghf_wdcSetWS_KernArgs, _ghf_wdcExecWS_Kern added 5 vuotta sitten
  hk 7b91770a03 _GDM_copyMem, oclh_getDevNameByIndex added + minor improvements 5 vuotta sitten

+ 42 - 13
Makefile

@@ -3,13 +3,17 @@ MODEFLAGS := -O3
 TARGETDIR := build
 
 PRFX_PATH        ?= ~/opt/oclh
-OPENCL_INCLUDES  := -I"/usr/local/cuda/include"
-OPENCL_LIBRARIES := -L/usr/local/cuda/lib64 -L/opt/intel/opencl/lib64
+OPENCL_INCLUDES  := -I/usr/local/cuda/include \
+                    -I/opt/intel/system_studio_2020/opencl-sdk/include/CL/
+OPENCL_LIBRARIES := -L/usr/local/cuda/lib64 \
+                    -L/opt/intel/opencl_compilers_and_libraries_18.1.0.015/$\
+                      linux/compiler/lib/intel64_lin
 
-OCLH_LIBRARY_NAME  := liboclh.so.0.0
+OCLH_LIBRARY_NAME  := liboclh.so
 OCLH_COMPILER_NAME := oclh_cr
 OCLH_LINKER_NAME   := oclh_lr
 OCLH_BUILDER_NAME  := oclh_br
+OCLH_DEVNAME_NAME  := oclh_getDevNameByIndex
 
 RM          := rm -rf
 CC          := gcc
@@ -24,22 +28,30 @@ LIBS        := -lOpenCL
 # Every subdirectory with source files must be described here
 SRCDIR := src
 # Add inputs and outputs from these tool invocations to the build variables
-C_SRCS        := $(wildcard $(SRCDIR)/*.c)
 C_SRCS_LIB    := $(wildcard $(SRCDIR)/oclh_h*.c)
+C_SRCS_BTOOLS := $(C_SRCS_LIB) $(wildcard $(SRCDIR)/oclh_cc.c)
+C_SRCS_NM     := $(C_SRCS_LIB) $(wildcard $(SRCDIR)/oclh_get_dev_name_by_idx.c)
 TARGETDIR_LIB := $(TARGETDIR)/lib.o
 OBJS_LIB      := $(patsubst $(SRCDIR)/%.c,$(TARGETDIR_LIB)/%.o,$(C_SRCS_LIB))
 C_DEPS_LIB    := $(wildcard $(TARGETDIR_LIB)/*.d)
 TARGETDIR_CR  := $(TARGETDIR)/cr.o
-OBJS_CR       := $(patsubst $(SRCDIR)/%.c,$(TARGETDIR_CR)/%.o,$(C_SRCS))
+OBJS_CR       := $(patsubst $(SRCDIR)/%.c,$(TARGETDIR_CR)/%.o,$(C_SRCS_BTOOLS))
 C_DEPS_CR     := $(wildcard $(TARGETDIR_CR)/*.d)
 TARGETDIR_LR  := $(TARGETDIR)/lr.o
-OBJS_LR       := $(patsubst $(SRCDIR)/%.c,$(TARGETDIR_LR)/%.o,$(C_SRCS))
+OBJS_LR       := $(patsubst $(SRCDIR)/%.c,$(TARGETDIR_LR)/%.o,$(C_SRCS_BTOOLS))
 C_DEPS_LR     := $(wildcard $(TARGETDIR_LR)/*.d)
 TARGETDIR_BR  := $(TARGETDIR)/br.o
-OBJS_BR       := $(patsubst $(SRCDIR)/%.c,$(TARGETDIR_BR)/%.o,$(C_SRCS))
+OBJS_BR       := $(patsubst $(SRCDIR)/%.c,$(TARGETDIR_BR)/%.o,$(C_SRCS_BTOOLS))
 C_DEPS_BR     := $(wildcard $(TARGETDIR_BR)/*.d)
+TARGETDIR_NM  := $(TARGETDIR)/nm.o
+OBJS_NM       := $(patsubst $(SRCDIR)/%.c,$(TARGETDIR_NM)/%.o,$(C_SRCS_NM))
+C_DEPS_NM     := $(wildcard $(TARGETDIR_NM)/*.d)
 
 # Each subdirectory must supply rules for building sources it contributes
+$(TARGETDIR_NM)/%.o: $(SRCDIR)/%.c
+	mkdir -p $(TARGETDIR_NM); \
+	$(CC) $(INCLUDES) $(MODEFLAGS) $(CFLAGS) $(DEFS) $(OUTPUTFLAGS) \
+	-MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" -o "$@" "$<"
 $(TARGETDIR_BR)/%.o: $(SRCDIR)/%.c
 	mkdir -p $(TARGETDIR_BR); \
 	$(CC) $(INCLUDES) $(MODEFLAGS) $(CFLAGS) $(DEFS) $(OUTPUTFLAGS) \
@@ -59,9 +71,9 @@ $(TARGETDIR_LIB)/%.o: $(SRCDIR)/%.c
 
 # Add inputs and outputs from these tool invocations to the build variables
 # All Target
-all: oclh_library oclh_compiler oclh_linker oclh_builder
+all: oclh_library oclh_compiler oclh_linker oclh_builder oclh_devnamegetter
 debug: MODEFLAGS := -O0 -g3
-debug: oclh_library oclh_compiler oclh_linker oclh_builder
+debug: all
 # Tool invocations
 oclh_compiler: DEFS := -D__OCLH_COMPILER_ONLY_FLAG -D__OCLH_BUILD_LOG_TO_STDOUT_FLAG
 oclh_compiler: OUTPUTNAME := $(OCLH_COMPILER_NAME)
@@ -98,11 +110,21 @@ oclh_library: $(OBJS_LIB)
 	-shared -Wl,-soname,liboclh.so.0.0 -o "$(TARGETDIR)/$(OUTPUTNAME)"
 	@echo 'Finished building target: $@'
 	@echo ' '
+oclh_devnamegetter: DEFS :=
+oclh_devnamegetter: OUTPUTNAME := $(OCLH_DEVNAME_NAME)
+oclh_devnamegetter: $(OBJS_NM)
+	@echo 'Building target: $@'
+	@echo 'Invoking C Linker'
+	$(LINKER) $(LDFLAGS) $(MISCFLAGS) $(LIBS) $(OBJS_NM) \
+	-o "$(TARGETDIR)/$(OUTPUTNAME)"
+	@echo 'Finished building target: $@'
+	@echo ' '
 install:
 	mkdir -p $(PRFX_PATH)/bin
 	    cp $(TARGETDIR)/$(OCLH_COMPILER_NAME) $(PRFX_PATH)/bin
 	    cp $(TARGETDIR)/$(OCLH_LINKER_NAME)   $(PRFX_PATH)/bin
 	    cp $(TARGETDIR)/$(OCLH_BUILDER_NAME)  $(PRFX_PATH)/bin
+	    cp $(TARGETDIR)/$(OCLH_DEVNAME_NAME)  $(PRFX_PATH)/bin
 	mkdir -p $(PRFX_PATH)/lib
 	    cp $(TARGETDIR)/$(OCLH_LIBRARY_NAME)  $(PRFX_PATH)/lib
 	mkdir -p $(PRFX_PATH)/include/include_h
@@ -111,11 +133,15 @@ install:
 	    cp $(SRCDIR)/include_hd/* $(PRFX_PATH)/include/include_hd
 	mkdir -p $(PRFX_PATH)/include/include_d
 	    cp $(SRCDIR)/include_d/*  $(PRFX_PATH)/include/include_d
+	cd $(PRFX_PATH)/lib ; \
+	    ln -s $(OCLH_LIBRARY_NAME) $(OCLH_LIBRARY_NAME).0 ; \
+	    ln -s $(OCLH_LIBRARY_NAME) $(OCLH_LIBRARY_NAME).0.0
 uninstall:
 	$(RM) $(PRFX_PATH)/bin/$(OCLH_COMPILER_NAME) \
 	      $(PRFX_PATH)/bin/$(OCLH_LINKER_NAME) \
-	      $(PRFX_PATH)/bin/$(OCLH_LINKER_NAME)  $(PRFX_PATH)/bin \
-	      $(PRFX_PATH)/lib/$(OCLH_LIBRARY_NAME) $(PRFX_PATH)/lib \
+	      $(PRFX_PATH)/bin/$(OCLH_LINKER_NAME) \
+	      $(PRFX_PATH)/bin/$(OCLH_DEVNAME_NAME) $(PRFX_PATH)/bin \
+	      $(PRFX_PATH)/lib/$(OCLH_LIBRARY_NAME)* $(PRFX_PATH)/lib \
 	      $(PRFX_PATH)/include/include_h/*  $(PRFX_PATH)/include/include_h \
 	      $(PRFX_PATH)/include/include_hd/* $(PRFX_PATH)/include/include_hd \
 	      $(PRFX_PATH)/include/include_d/*  $(PRFX_PATH)/include/include_d \
@@ -126,10 +152,13 @@ clean:
 	    $(OBJS_CR)  $(C_DEPS_CR)  $(TARGETDIR_CR) \
 	    $(OBJS_LR)  $(C_DEPS_LR)  $(TARGETDIR_LR) \
 	    $(OBJS_BR)  $(C_DEPS_BR)  $(TARGETDIR_BR) \
+	    $(OBJS_NM)  $(C_DEPS_NM)  $(TARGETDIR_NM) \
 	    $(TARGETDIR)/$(OCLH_BUILDER_NAME) \
 	    $(TARGETDIR)/$(OCLH_COMPILER_NAME) \
 	    $(TARGETDIR)/$(OCLH_LINKER_NAME) \
-	    $(TARGETDIR)/$(OCLH_LIBRARY_NAME)
+	    $(TARGETDIR)/$(OCLH_LIBRARY_NAME) \
+	    $(TARGETDIR)/$(OCLH_DEVNAME_NAME)
 	@echo ' '
+repack: uninstall clean all install
 
-.PHONY: all clean debug
+.PHONY: all clean debug repack

+ 4 - 4
documentation/oclh_doc.cls

@@ -357,10 +357,10 @@
 \renewcommand\p@enumii{\theenumi}
 \renewcommand\p@enumiii{\theenumi(\theenumii)}
 \renewcommand\p@enumiv{\p@enumiii\theenumiii}
-\newcommand\labelitemi{\textbullet}                       
+\newcommand\labelitemi{\textbullet}
 \newcommand\labelitemii{\normalfont\bfseries \textendash}
-\newcommand\labelitemiii{\textasteriskcentered}          
-\newcommand\labelitemiv{\textperiodcentered}             
+\newcommand\labelitemiii{\textasteriskcentered}
+\newcommand\labelitemiv{\textperiodcentered}
 \newenvironment{description}
                {\list{}{\labelwidth\z@ \itemindent-\leftmargin
                         \let\makelabel\descriptionlabel}}
@@ -617,4 +617,4 @@
   \onecolumn
 \fi
 \endinput
-%%
+%%

+ 1 - 1
documentation/opencl_helpers_documentation-russian.tex

@@ -237,7 +237,7 @@ xindy для составления предметного указателя. 
 \verb|OpenCL_helpers/documentation| запуском сценария сборки\par
 \indent\indent\verb|./build_script|\par
 \noindent%
-Если в ходе исполнения данного скрипта не возникло ошибок, то в каталоге
+Если в ходе исполнения данного сценария не возникло ошибок, то в каталоге
 \verb|OpenCL_helpers/documentation/build| появятся файлы\par
 \indent\indent\verb|opencl_helpers_documentation-russian.pdf|\par
 \indent\indent\verb|opencl_helpers_documentation-english.pdf|\par

+ 26 - 33
src/include_d/oclh_d_mem_alloc.clh

@@ -2,6 +2,7 @@
  * oclh_d_mem_alloc.clh
  *      Author: havock
  */
+//#define _OCLH_OCL_COMPILER_
 #ifndef OCLH_D_MEM_ALLOC_CLH_
 #define OCLH_D_MEM_ALLOC_CLH_ 1
 #include <oclh_hd_std_types.clh>
@@ -187,7 +188,7 @@
             __GD__MemHeader.u64Ptr[__GDM__MHI_PREV]=(-1); \
             __GD__MemHeader.u64Ptr[__GDM__MHI_CURR]=0ul; \
             __GD__MemHeader.u64Ptr[__GDM__MHI_NEXT]=0ul; \
-    }
+        }
 #define _GDM__NULL(__GDM_NULL_MAC_Region) \
     ((uint64_t) __GD__ ## __GDM_NULL_MAC_Region ## _Heap_u8Ptr)
 #define _GDM_heap_PROTO(__GDM_HEAP_PROTO_MAC_Region) \
@@ -205,8 +206,8 @@
     __GD_CAST_MACROS_CastUnion.srcPtr=(__GDM_CAST_MACROS_srcPtr); \
     (__GDM_CAST_MACROS_dstPtr)=__GD_CAST_MACROS_CastUnion.dstPtr; \
 }
-#define _GDM_is_ptr_NULL(__OCLH_ISNULL_MAC_Region,__OCLH_ISNULL_MAC_Ptr) \
-   (((uint64_t) (__OCLH_ISNULL_MAC_Ptr))==_GDM__NULL(__OCLH_ISNULL_MAC_Region))
+#define _GDM_is_ptr_NULL(__GDM_ISNULL_MAC_Region,__GDM_ISNULL_MAC_Ptr) \
+   (((uint64_t) (__GDM_ISNULL_MAC_Ptr))==_GDM__NULL(__GDM_ISNULL_MAC_Region))
 #define _GDM_malloc(__GDM_MALLOC_MAC_Region, __GDM_MALLOC_MAC_Type, \
                     __GDM_MALLOC_MAC_Ptr, __GDM_MALLOC_MAC_Size) \
 { \
@@ -233,19 +234,21 @@
         ((__GD__ ## __GDM_PRINT_HEAP_STATE_MAC_Region ## _Heap_u8Ptr)); \
 }
 
-__private uint8_t* malloc__private(__private       uint8_t* const pu8HeapPtr,
-                                             const uint64_t       u64Size);
-__global  uint8_t* malloc__global( __global        uint8_t* const pu8HeapPtr,
-                                             const uint64_t       u64Size);
-void free__private(__private uint8_t* const pu8HeapPtr,
+__private static uint8_t* malloc__private(__private uint8_t* const pu8HeapPtr,
+                                              const uint64_t       u64Size);
+__global  static uint8_t* malloc__global( __global  uint8_t* const pu8HeapPtr,
+                                              const uint64_t       u64Size);
+static void free__private(__private uint8_t* const pu8HeapPtr,
                    __private uint8_t**      ppu8Ptr);
-void free__global( __global  uint8_t* const pu8HeapPtr,
+static void free__global( __global  uint8_t* const pu8HeapPtr,
                    __global  uint8_t**      ppu8Ptr);
-int32_t Print__private_HeapState(__private const uint8_t* const pu8HeapPtr);
-int32_t Print__global_HeapState( __global  const uint8_t* const pu8HeapPtr);
+static int32_t Print__private_HeapState(__private const uint8_t
+                                                            * const pu8HeapPtr);
+static int32_t Print__global_HeapState( __global  const uint8_t
+                                                            * const pu8HeapPtr);
 
-__private uint8_t* malloc__private(__private       uint8_t* const pu8HeapPtr,
-                                             const uint64_t       u64Size) {
+__private static uint8_t* malloc__private(__private uint8_t* const pu8HeapPtr,
+                                              const uint64_t       u64Size) {
     __private uint8_t* pu8Res;
     __GDM_STRICTLY_TECHNICAL_common_malloc_MACROS(__private, uint8_t,
                                                   pu8Res, u64Size,
@@ -255,8 +258,8 @@ __private uint8_t* malloc__private(__private       uint8_t* const pu8HeapPtr,
                                                   __GDM__HEAP_HEADER_SZ_BYTES);
     return(pu8Res);
 }
-__global uint8_t* malloc__global(__global       uint8_t* const pu8HeapPtr,
-                                          const uint64_t       u64Size) {
+__global static uint8_t* malloc__global(__global uint8_t* const pu8HeapPtr,
+                                           const uint64_t       u64Size) {
     __global uint8_t* pu8Res;
     __GDM_STRICTLY_TECHNICAL_common_malloc_MACROS(__global, uint8_t,
                                                   pu8Res, u64Size,
@@ -266,8 +269,8 @@ __global uint8_t* malloc__global(__global       uint8_t* const pu8HeapPtr,
                                                   __GDM__HEAP_HEADER_SZ_BYTES);
     return(pu8Res);
 }
-void free__private(__private uint8_t* const pu8HeapPtr,
-                   __private uint8_t**      ppu8Ptr) {
+static void free__private(__private uint8_t* const pu8HeapPtr,
+                          __private uint8_t**      ppu8Ptr) {
     if(*ppu8Ptr) {
         __GDM_STRICTLY_TECHNICAL_common_free_MACROS(__private, uint8_t,
                                                     *ppu8Ptr, pu8HeapPtr,
@@ -276,8 +279,8 @@ void free__private(__private uint8_t* const pu8HeapPtr,
     }
     return;
 }
-void free__global(__global uint8_t* const pu8HeapPtr,
-                  __global uint8_t**      ppu8Ptr) {
+static void free__global(__global uint8_t* const pu8HeapPtr,
+                         __global uint8_t**      ppu8Ptr) {
     if(*ppu8Ptr) {
         __GDM_STRICTLY_TECHNICAL_common_free_MACROS(__global, uint8_t,
                                                     *ppu8Ptr, pu8HeapPtr,
@@ -286,30 +289,20 @@ void free__global(__global uint8_t* const pu8HeapPtr,
     }
     return;
 }
-int32_t Print__private_HeapState(__private const uint8_t* const pu8HeapPtr) {
+static int32_t Print__private_HeapState(__private const uint8_t
+                                                           * const pu8HeapPtr) {
     __GDM_STRICTLY_TECHNICAL_print_heap_state_MACROS(__private, pu8HeapPtr,
                                                 _GDM__PRIVATE_MEM_HEAP_SZ_BYTES,
                                                    __GDM__HEAP_HEADER_SZ_BYTES);
     return(0);
 }
-int32_t Print__global_HeapState(__global const uint8_t* const pu8HeapPtr) {
+static int32_t Print__global_HeapState(__global const uint8_t
+                                                           * const pu8HeapPtr) {
     __GDM_STRICTLY_TECHNICAL_print_heap_state_MACROS(__global, pu8HeapPtr,
                                                 _GDM__PRIVATE_MEM_HEAP_SZ_BYTES,
                                                    __GDM__HEAP_HEADER_SZ_BYTES);
     return(0);
 }
 
-// flt32_t _gdf_atomicAdd_f32(__global volatile flt32_t *pfPtr, flt32_t fVal) {
-//     __global volatile uint32_t* pu32Ptr;
-//     union { uint32_t u32; flt32_t f32; } nxt,exp,cur;
-//     _GDM_cast_pointer(__global volatile ,uint32_t,pu32Ptr,flt32_t,pfPtr);
-//     cur.f32=*pfPtr;
-//     do {
-//         exp.f32=cur.f32; nxt.f32=exp.f32+fVal;
-//         cur.u32=atomic_cmpxchg(pu32Ptr,exp.u32,nxt.u32);
-//     } while(cur.u32!=exp.u32);
-//     return(cur.f32);
-// }
-
 #endif /* _OCLH_OCL_COMPILER_ */
 #endif /* OCLH_D_MEM_ALLOC_CLH_ */

+ 29 - 31
src/include_d/oclh_d_srr.clh

@@ -2,11 +2,9 @@
  * oclh_d_srr.clh
  *      Author: havock
  */
-#ifndef OCLH_D_SRR_CLH_DECLS_
-#define OCLH_D_SRR_CLH_DECLS_ 1
+#ifndef OCLH_D_SRR_CLH_
+#define OCLH_D_SRR_CLH_ 1
 #include <oclh_hd_std_types.clh>
-
-#if defined(_OCLH_OCL_HOST_HEADERS_) || defined(_OCLH_OCL_COMPILER_)
 /*
  * current section is available for host just for memory size calculations
  */
@@ -22,9 +20,23 @@ typedef struct _GDT_CONST_VECTOR_SET_DESCRIPTION {
                    uint64_t u64NofVs;
 } _GDT_CVS_DESC;
 #pragma pack(pop)
-#endif /* defined(_OCLH_OCL_HOST_HEADERS_) || defined(_OCLH_OCL_COMPILER_) */
 
 #ifdef _OCLH_OCL_COMPILER_
+#include <oclh_d_mem_alloc.clh>
+
+#define _GDM_copyMem(__GDM_MAC_DstRegion, __GDM_MAC_DstType, __GDM_MAC_DstPtr, \
+                     __GDM_MAC_SrcRegion, __GDM_MAC_SrcType, __GDM_MAC_SrcPtr, \
+                     __GDM_MAC_Size) \
+{ \
+    __GDM_MAC_DstRegion uint8_t* pu8Dst; __GDM_MAC_SrcRegion uint8_t* pu8Src; \
+    __private uint64_t sz=(__GDM_MAC_Size); \
+    _GDM_cast_pointer(__GDM_MAC_DstRegion, uint8_t, pu8Dst, \
+                      __GDM_MAC_DstType, __GDM_MAC_DstPtr); \
+    _GDM_cast_pointer(__GDM_MAC_SrcRegion, uint8_t, pu8Src, \
+                      __GDM_MAC_SrcType, __GDM_MAC_SrcPtr); \
+    while(sz--) pu8Dst[sz]=pu8Src[sz]; \
+}
+
 _GDT_VS_DESC _gdf_declVecSetFromTermOffset(__global flt32_t* const pf32V,
                                            __private const uint64_t u64VDim,
                                            __private const uint64_t u64VOffset,
@@ -40,29 +52,21 @@ flt32_t _gdf_euclDst_f32_pp(__private const flt32_t* const pA,
                             __private const uint64_t       Dim);
 flt32_t _gdf_euclDst_f32_gg(__global  const flt32_t* const pA,
                             __global  const flt32_t* const pB,
-                            __private const	uint64_t       Dim);
+                            __private const uint64_t       Dim);
 int32_t _gdf_copyVec_f32_pp(__private       flt32_t* const pDst,
                             __private const flt32_t* const pSrc,
-                            __private const uint64_t       Dim);
+                            __private       uint64_t       Dim);
 int32_t _gdf_copyVec_f32_gg(__global        flt32_t* const pDst,
                             __global  const flt32_t* const pSrc,
-                            __private const uint64_t       Dim);
+                            __private       uint64_t       Dim);
 int32_t _gdf_copyVec_f32_pg(__private       flt32_t* const pDst,
                             __global  const flt32_t* const pSrc,
-                            __private const uint64_t       Dim);
+                            __private       uint64_t       Dim);
 uint32_t CRC32b(__global  const uint8_t* const pu8Data,
                 __private const uint64_t u64Sz);
-#endif /* _OCLH_OCL_COMPILER_ */
-
-#endif /* OCLH_D_SRR_CLH_DECLS_ */
-
 
 
 
-#ifdef _OCLH_OCL_COMPILER_
-#ifndef OCLH_D_SRR_CLH_IMPLS_
-#define OCLH_D_SRR_CLH_IMPLS_ 1
-
 _GDT_VS_DESC _gdf_declVecSetFromTermOffset(__global  flt32_t* const pf32V,
                                            __private const uint64_t u64VDim,
                                            __private const uint64_t u64VOffset,
@@ -120,31 +124,25 @@ flt32_t _gdf_euclDst_f32_gg(__global  const flt32_t* const pfA,
 }
 int32_t _gdf_copyVec_f32_pp(__private       flt32_t* const pDst,
                             __private const flt32_t* const pSrc,
-                            __private const uint64_t       Dim) {
-    __private uint64_t i=0ul;
-    for(i=0ul; i<Dim; i++) pDst[i]=pSrc[i];
-    return(0);
+                            __private       uint64_t       Dim) {
+    while(Dim--) pDst[Dim]=pSrc[Dim]; return(0);
 }
 int32_t _gdf_copyVec_f32_gg(__global        flt32_t* const pDst,
                             __global  const flt32_t* const pSrc,
-                            __private const uint64_t       Dim) {
-    __private uint64_t i=0ul;
-    for(i=0ul; i<Dim; i++) pDst[i]=pSrc[i];
-    return(0);
+                            __private       uint64_t       Dim) {
+    while(Dim--) pDst[Dim]=pSrc[Dim]; return(0);
 }
 int32_t _gdf_copyVec_f32_pg(__private       flt32_t* const pDst,
                             __global  const flt32_t* const pSrc,
-                            __private const uint64_t       Dim) {
-    __private uint64_t i=0ul;
-    for(i=0;i<Dim;i++) pDst[i]=pSrc[i];
+                            __private       uint64_t       Dim) {
+    while(Dim--) pDst[Dim]=pSrc[Dim];
     return(0);
 }
-
 uint32_t CRC32b(__global  const uint8_t* const pu8Data,
                 __private const uint64_t       u64Sz) {
     __private uint64_t i=0ul;
     __private uint32_t byte=0u,
-                       mask=0u;
+                       mask=0u,
                        crc=0xFFFFFFFF;
     for(i=0ul; i<u64Sz; i++) {
         __private int32_t j=0;
@@ -158,5 +156,5 @@ uint32_t CRC32b(__global  const uint8_t* const pu8Data,
     return(crc);
 }
 
-#endif /* OCLH_D_SRR_CLH_IMPLS_ */
 #endif /* _OCLH_OCL_COMPILER_ */
+#endif /* OCLH_D_SRR_CLH_ */

+ 1 - 0
src/include_h/oclh_h_internals.h

@@ -14,6 +14,7 @@ typedef enum __GHE_CASE_SENSITIVITY {
 } __GHE_CASE_SENS;
 
 uint64_t __ghf_removePreNPostSpacesFromCharPtr(     char* const pcStr);
+uint64_t __ghf_replaceIllegalFSCharsInCharPtr(      char* const pcStr);
 uint64_t __ghf_replaceSpacesWithUnderscoreInCharPtr(char* const pcStr);
 int32_t __ghf_concatHeapStrAndCharPtr(char** ppcDst, const char* const pcStr);
 int32_t __ghf_cmpCharPtrAndWC(const char*           pcStr,

+ 15 - 15
src/include_h/oclh_h_ws_base.h

@@ -6,6 +6,7 @@
 #define OCLH_H_WS_BASE_H_ 1
 #include <sys/types.h>
 #include <stdlib.h>
+#include <stdarg.h>
 #include <CL/opencl.h>
 #include <oclh_h_base_log.h>
 
@@ -61,7 +62,7 @@ int32_t _ghf_genrWS(      _GHT_WRKSET*  const pwSet,
                     const _GHT_LOG            Log,
                     const int32_t             i32ExclusiveLogFlag,
                     const cl_device_id        clWrkDev,
-                    const char**        const ppcCLProgramSources,
+                          char**        const ppcCLProgramSources,
                     const _GHE_SRCTYPE        SourceType,
                     const char*         const pcOCLBuildOpts,
                     const _GHE_LOGLVL         LogLvl,
@@ -77,8 +78,9 @@ int32_t _ghf_genrWS(      _GHT_WRKSET*  const pwSet,
 int32_t _ghf_wipeWS(_GHT_WRKSET* const pwSet);
 
 int32_t _ghf_isWS_LogValid(const _GHT_WRKSET wSet);
-int32_t _ghf_recalcWS_WIWG(_GHT_WRKSET* const pwSet,
-                           const size_t szNofAllTasks);
+int32_t _ghf_recalcWS_WIWG_1x1(  _GHT_WRKSET* const pwSet);
+int32_t _ghf_recalcWS_WIWG_dense(_GHT_WRKSET* const pwSet,
+                           const size_t             szNofAllTasks);
 int32_t _ghf_flfnWS(_GHT_WRKSET wSet);
 
 cl_device_id    _ghf_getWS_Dev(        _GHT_WRKSET wSet);
@@ -91,12 +93,16 @@ cl_uint         _ghf_getWS_MaxCmpUnits(_GHT_WRKSET wSet);
 int32_t         _ghf_saveWS_ProgramBinaries(_GHT_WRKSET wSet,
                                             char* const pcOutputName,
                                             _GHE_OUTNAME_TYPE OutNameType);
-int32_t         _ghf_wdcChkWS_APIErr (  _GHT_WRKSET wSet,
-                                  const char* const pcAPICall,
-                                  const int32_t i32FlashFlag);
-int32_t         _ghf_wdcChkWS_KerErr(   _GHT_WRKSET wSet,
-                                  const cl_kernel   clKer);
+int32_t         _ghf_wdcChkWS_APIErr(      _GHT_WRKSET wSet,
+                                     const char* const pcAPICall,
+                                     const int32_t i32FlashFlag);
+int32_t         _ghf_wdcChkWS_KerErr(      _GHT_WRKSET wSet,
+                                     const cl_kernel   clKer);
 int32_t         _ghf_wdcSetWS_KerErrToZero(_GHT_WRKSET wSet);
+int32_t         _ghf_wdcSetWS_KernArgs(    _GHT_WRKSET wSet,
+                                     const cl_kernel kr, ...);
+int32_t         _ghf_wdcBExecWS_Kern(      _GHT_WRKSET wSet,
+                                     const char* const pcKernName, ...);
 
 #pragma pack(push,1)
 typedef struct _GHT_LIST_OF_DEVICES_DESCRIPTION {
@@ -130,7 +136,7 @@ _GHT_AWSS _ghf_declAWSs(const _GHE_LOGLVL  LogLvl,
 int32_t _ghf_genrAWSs(      _GHT_AWSS*   const pAWSs,
                       const _GHT_LOG           Log,
                       const _GHT_DEVLIST_DESC  DevLstDesc,
-                      const char**       const ppcCLProgramSources,
+                            char**       const ppcCLProgramSources,
                       const _GHE_SRCTYPE       SourceType,
                       const char*        const OCLBuildOpts
 #if defined(__OCLH_BUILDER_FLAG) || \
@@ -146,12 +152,6 @@ int32_t _ghf_wipeAWSs(_GHT_AWSS* const pAWSs);
 int32_t _ghf_buildDevList(_GHT_WRKSET wSet, cl_device_id** ppDevLst);
 int32_t _ghf_wipeDevList(cl_device_id** ppDevLst);
 
-
-
-
-
-
-
 int32_t __ghf_setWS_TextProgramId(_GHT_WRKSET wSet,
                                   char* const pcDst,
                                   char* const pcOutputPrefix);

+ 5 - 20
src/include_hd/oclh_hd_srr.clh

@@ -2,15 +2,14 @@
  * oclh_hd_srr.clh
  *      Author: havock
  */
-#ifndef _OCLH_OCL_COMPILER_
+#if !defined(_OCLH_OCL_COMPILER_)
 #define __global
 #define __private
 #define __kernel
-#endif /* _OCLH_OCL_COMPILER_ */
+#endif /* !defined(_OCLH_OCL_COMPILER_) */
 
-#if defined(_OCLH_OCL_HOST_HEADERS_) || defined(_OCLH_OCL_COMPILER_)
-#ifndef OCLH_HD_SRR_CLH_DEFS_
-#define OCLH_HD_SRR_CLH_DEFS_ 1
+#ifndef OCLH_HD_SRR_CLH_
+#define OCLH_HD_SRR_CLH_ 1
 #include <oclh_hd_std_types.clh>
 
 int32_t  _ghdf_sumFromAtoB_i32(__private const int32_t a,
@@ -22,15 +21,6 @@ int32_t  _ghdf_getMaxOfInt32s( __private const int32_t* const pi32Vals,
 int32_t  _ghdf_getMaxOf2Int32s(__private const int32_t a,
                                __private const int32_t b);
 
-#endif /* OCLH_HD_SRR_CLH_DEFS_ */
-#endif /* defined(_OCLH_OCL_HOST_HEADERS_) || defined(_OCLH_OCL_COMPILER_) */
-
-
-
-#if defined(_OCLH_OCL_HOST_ALGORITHMS_) || defined(_OCLH_OCL_COMPILER_)
-#ifndef OCLH_HD_SRR_CLH_DECLS_
-#define OCLH_HD_SRR_CLH_DECLS_ 1
-
 int32_t _ghdf_sumFromAtoB_i32(__private const int32_t a,
                               __private const int32_t b) {
     const int32_t d=b-a, s=b+a;
@@ -38,7 +28,6 @@ int32_t _ghdf_sumFromAtoB_i32(__private const int32_t a,
             (s*((d+1)/2)):
             ((s-1)*(d/2)+b));
 }
-
 uint64_t _ghdf_sumFromAtoB_u64(__private const uint64_t a,
                                __private const uint64_t b) {
     const uint64_t d=b-a, s=b+a;
@@ -46,7 +35,6 @@ uint64_t _ghdf_sumFromAtoB_u64(__private const uint64_t a,
             (s*((d+1ul)/2ul)):
             ((s-1ul)*(d/2ul)+b));
 }
-
 int32_t _ghdf_getMaxOfInt32s(__private const int32_t* const pi32Vals,
                              __private const uint64_t       u64NofVals) {
     __private int32_t i32Max=pi32Vals[0];
@@ -54,11 +42,8 @@ int32_t _ghdf_getMaxOfInt32s(__private const int32_t* const pi32Vals,
     for(i=1ul; i<u64NofVals; i++) if(i32Max<pi32Vals[i]) i32Max=pi32Vals[i];
     return(i32Max);
 }
-
 int32_t _ghdf_getMaxOf2Int32s(__private const int32_t a,
                               __private const int32_t b) {
     return((a>b)?a:b);
 }
-
-#endif /* OCLH_HD_SRR_CLH_DECLS_ */
-#endif /* defined(_OCLH_OCL_HOST_ALGORITHMS_) || defined(_OCLH_OCL_COMPILER_) */
+#endif /* OCLH_HD_SRR_CLH_ */

+ 1 - 1
src/oclh_cc.c

@@ -125,7 +125,7 @@ int32_t main(int32_t argc, char *argv[]) {
 #endif /* defined(__OCLH_BUILDER_FLAG) || defined(__OCLH_COMPILER_ONLY_FLAG) */
             i++;
         }
-        err=_ghf_genrAWSs(&AWSs,Log,DevLstDesc,(const char** const)ppcSources,
+        err=_ghf_genrAWSs(&AWSs,Log,DevLstDesc,ppcSources,
 #if defined(__OCLH_BUILDER_FLAG) || defined(__OCLH_COMPILER_ONLY_FLAG)
                           _GHE_HL_LISTINGS,
 #endif /* defined(__OCLH_BUILDER_FLAG) || defined(__OCLH_COMPILER_ONLY_FLAG) */

+ 36 - 0
src/oclh_get_dev_name_by_idx.c

@@ -0,0 +1,36 @@
+#include <string.h>
+#include <oclh.h>
+#include <oclh_h_base_dev_clapi_wrappers.h>
+
+int32_t main(int32_t argc, char *argv[]) {
+/**/
+#define __CLEAN_GET_DEV_NAME_BY_IDX_INFRASTRUCTURE_AND_EXIT \
+{   if(pcDevName) { free(pcDevName);  pcDevName=NULL; } \
+    _ghf_wipeDevList(&DevLst); _ghf_wipeWS(&tmpWS); \
+    _ghf_wipeLog(&Log); return(err); }
+/**/
+    int32_t err=0;
+    if(argc<2) return(1);
+    {
+        _GHT_LOG Log=_ghf_declLog();
+        _GHT_WRKSET tmpWS=_ghf_declWS();
+        cl_uint cluDevIdx=(cl_uint)strtoul((argc>1)?argv[1]:"",NULL,10);
+        char* pcDevName=NULL;
+        cl_device_id* DevLst=NULL;
+        if((err=_ghf_genrLog(&Log,"/dev/null")))
+            __CLEAN_GET_DEV_NAME_BY_IDX_INFRASTRUCTURE_AND_EXIT;
+        tmpWS.Log=Log; tmpWS.pwSetAddr=&tmpWS;
+        if((err=_ghf_buildDevList(tmpWS,&DevLst)))
+            __CLEAN_GET_DEV_NAME_BY_IDX_INFRASTRUCTURE_AND_EXIT;
+        pcDevName=_ghf_getDevInf_charptr(DevLst[cluDevIdx],CL_DEVICE_NAME,&err);
+        if(err) __CLEAN_GET_DEV_NAME_BY_IDX_INFRASTRUCTURE_AND_EXIT;
+        __ghf_removePreNPostSpacesFromCharPtr(pcDevName);
+        __ghf_replaceIllegalFSCharsInCharPtr(pcDevName);
+        __ghf_replaceSpacesWithUnderscoreInCharPtr(pcDevName);
+        printf("%s\n",pcDevName);
+        __CLEAN_GET_DEV_NAME_BY_IDX_INFRASTRUCTURE_AND_EXIT;
+    }
+/**/
+#undef __CLEAN_GET_DEV_NAME_BY_IDX_INFRASTRUCTURE_AND_EXIT
+/**/
+}

+ 39 - 0
src/oclh_h_internals.c

@@ -33,6 +33,45 @@ uint64_t __ghf_removePreNPostSpacesFromCharPtr(char* const pcStr) {
     return(u64NofRemovedSpaces);
 }
 
+uint64_t __ghf_replaceIllegalFSCharsInCharPtr(char* const pcStr) {
+    if(pcStr) {
+/* TODO: optimize and clean */
+        uint64_t u64NofReplaced=0ul, i=0ul;
+        char cPrev='\0';
+        int32_t i32IncFlag=1;
+        while(pcStr[i]) {
+            if(pcStr[i]=='(') {
+                if(!strncmp(&pcStr[i],"(R)",3) || !strncmp(&pcStr[i],"(r)",3) ||
+                   !strncmp(&pcStr[i],"(C)",3) || !strncmp(&pcStr[i],"(c)",3)) {
+                    uint64_t j=i, k=i+3ul;
+                    while(pcStr[k]) { pcStr[j]=pcStr[k]; k++; j++; }
+                    pcStr[j]='\0';
+                }
+                if(!strncmp(&pcStr[i],"(TM)",4)||!strncmp(&pcStr[i],"(Tm)",4)||
+                   !strncmp(&pcStr[i],"(tM)",4)||!strncmp(&pcStr[i],"(tm)",4)) {
+                    uint64_t j=i, k=i+4ul;
+                    while(pcStr[k]) { pcStr[j]=pcStr[k]; k++; j++; }
+                    pcStr[j]='\0';
+                }
+            }
+            if(pcStr[i]=='/' || pcStr[i]=='\\' || pcStr[i]=='?' ||
+               pcStr[i]=='%' || pcStr[i]=='*'  || pcStr[i]==':' ||
+               pcStr[i]=='|' || pcStr[i]=='"'  || pcStr[i]=='<' ||
+               pcStr[i]=='>' || pcStr[i]==','  || pcStr[i]==';' ||
+               pcStr[i]=='=' || pcStr[i]=='&'  || pcStr[i]=='#' ||
+               pcStr[i]=='$' || pcStr[i]=='@'  || pcStr[i]=='(' ||
+               pcStr[i]==')') { pcStr[i]=' '; u64NofReplaced++; }
+            if(isspace(pcStr[i]) && isspace(cPrev)) {
+                uint64_t j=i, k=i+1ul;
+                while(pcStr[k]) { pcStr[j]=pcStr[k]; k++; j++; }
+                pcStr[j]='\0';
+                i32IncFlag=0;
+            };
+            if(i32IncFlag) { cPrev=pcStr[i]; i++; } else i32IncFlag=1;
+        }
+        return(u64NofReplaced);
+    } else return(0ul);
+}
 uint64_t __ghf_replaceSpacesWithUnderscoreInCharPtr(char* const pcStr) {
     if(pcStr) {
         uint64_t u64NofReplacedSpaces=0ul, i=0ul;

+ 110 - 86
src/oclh_h_ws_base.c

@@ -32,7 +32,7 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                     const _GHT_LOG            Log,
                     const int32_t             i32ExclusiveLogFlag,
                     const cl_device_id        clWrkDev,
-                    const char**       const  ppcCLProgramSources,
+                          char**       const  ppcCLProgramSources,
                     const _GHE_SRCTYPE        SourceType,
                     const char*        const  pcOCLBuildOpts,
                     const _GHE_LOGLVL         LogLvl,
@@ -47,12 +47,20 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                                                             ) {
 /**/
 #define __CLAPI_ERR_ROUTINE_WITH_RET(___CLAPI_CALL) \
-{ const cl_int tmp=pwSet->APIErr; \
-  char pcLogMsg[_GHM_MAXLEN_OF_LOGMSG]; \
-  snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG, "%s/%s", __func__, ___CLAPI_CALL); \
-  _ghf_logWS_APIErr(*pwSet, pcLogMsg); \
-  _ghf_wipeWS(pwSet); \
-  return(tmp); }
+{   const cl_int tmp=pwSet->APIErr; \
+    char pcLogMsg[_GHM_MAXLEN_OF_LOGMSG]; \
+    snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG, \
+             "%s/%s", __func__, ___CLAPI_CALL); \
+    _ghf_logWS_APIErr(*pwSet, pcLogMsg); \
+    _ghf_wipeWS(pwSet); \
+    return(tmp); }
+#define __CLAPI_CREATE_PROGRAM_ERR_TO_STDOUT(___CLAPI_CALL) \
+{   FILE* pTmpFilePtr=pwSet->Log.pfOut; \
+    pwSet->Log.pfOut=stdout; \
+    _ghf_logWS_DevInfoShort(*pwSet,_ghf_getWS_Dev(*pwSet),NULL); \
+    snprintf(pcLogMsg,_GHM_MAXLEN_OF_LOGMSG,___CLAPI_CALL); \
+    _ghf_logWS_APIErr(*pwSet, pcLogMsg); _ghf_logWS_Delim(*pwSet); \
+    pwSet->Log.pfOut=pTmpFilePtr; }
 /**/
     char pcLogMsg[_GHM_MAXLEN_OF_LOGMSG];
     if(!pwSet) return(_GHM_NULL_POINTER_RECEIVED_ERROR);
@@ -164,29 +172,21 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                                * исходных текстов препятствует созданию
                                * ядерных функций. Необходимо разобраться в
                                * причинах. */
-                            /*((size_t**)ppcCLProgramSources)[cluNofListings+1],*/
+                          /*((size_t**)ppcCLProgramSources)[cluNofListings+1],*/
                               &pwSet->APIErr);
                 if(pwSet->APIErr) {
 #ifdef __OCLH_BUILD_LOG_TO_STDOUT_FLAG
-                    {
-                        FILE* pTmpFilePtr=pwSet->Log.pfOut;
-                        pwSet->Log.pfOut=stdout;
-                        _ghf_logWS_DevInfoShort(*pwSet,_ghf_getWS_Dev(*pwSet),
-                                                NULL);
-                        snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG,
-                                 "clCreateProgramWithSource");
-                        _ghf_logWS_APIErr(*pwSet, pcLogMsg);
-                        _ghf_logWS_Delim(*pwSet);
-                        pwSet->Log.pfOut=pTmpFilePtr;
-                    }
+              __CLAPI_CREATE_PROGRAM_ERR_TO_STDOUT("clCreateProgramWithSource");
 #endif /* __OCLH_BUILD_LOG_TO_STDOUT_FLAG */
                     __CLAPI_ERR_ROUTINE_WITH_RET("clCreateProgramWithSource")
-                } }
+                }
+              }
             break;
             case _GHE_IR_LISTINGS:
               { /* TODO: make IR output and processing */
                 _ghf_logWS_Msg(*pwSet, "_GHE_IR_LISTINGS");
-                _ghf_wipeWS(pwSet); return(1); }
+                _ghf_wipeWS(pwSet); return(1); 
+              }
             break;
             case _GHE_SEPARATED_OBJECTS:
               { cl_device_id cldev=_ghf_getWS_Dev(*pwSet);
@@ -220,17 +220,7 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                         _ghf_freeHostZ(&pPrograms);
                         pwSet->APIErr=err;
 #ifdef __OCLH_BUILD_LOG_TO_STDOUT_FLAG
-                    {
-                        FILE* pTmpFilePtr=pwSet->Log.pfOut;
-                        pwSet->Log.pfOut=stdout;
-                        _ghf_logWS_DevInfoShort(*pwSet,_ghf_getWS_Dev(*pwSet),
-                                                NULL);
-                        snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG,
-                                 "clCreateProgramWithBinary");
-                        _ghf_logWS_APIErr(*pwSet, pcLogMsg);
-                        _ghf_logWS_Delim(*pwSet);
-                        pwSet->Log.pfOut=pTmpFilePtr;
-                    }
+              __CLAPI_CREATE_PROGRAM_ERR_TO_STDOUT("clCreateProgramWithBinary");
 #endif /* __OCLH_BUILD_LOG_TO_STDOUT_FLAG */
                        __CLAPI_ERR_ROUTINE_WITH_RET("clCreateProgramWithBinary")
                     }
@@ -248,24 +238,15 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                     _ghf_freeHostZ(&pPrograms);
                     pwSet->APIErr=err;
 #ifdef __OCLH_BUILD_LOG_TO_STDOUT_FLAG
-                    {
-                        FILE* pTmpFilePtr=pwSet->Log.pfOut;
-                        pwSet->Log.pfOut=stdout;
-                        _ghf_logWS_DevInfoShort(*pwSet,_ghf_getWS_Dev(*pwSet),
-                                                NULL);
-                        snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG,
-                                 "clLinkProgram");
-                        _ghf_logWS_APIErr(*pwSet, pcLogMsg);
-                        _ghf_logWS_Delim(*pwSet);
-                        pwSet->Log.pfOut=pTmpFilePtr;
-                    }
+                    __CLAPI_CREATE_PROGRAM_ERR_TO_STDOUT("clLinkProgram");
 #endif /* __OCLH_BUILD_LOG_TO_STDOUT_FLAG */
                     __CLAPI_ERR_ROUTINE_WITH_RET("clLinkProgram")
                 }
                 for(i=0u; i<cluNofBinaries; i++)
                     if(pPrograms[i])
                         pwSet->APIErr=clReleaseProgram(pPrograms[i]);
-                _ghf_freeHostZ(&pPrograms); }
+                _ghf_freeHostZ(&pPrograms);
+              }
             break;
             case _GHE_LINKED_OBJECTS:
               { cl_device_id cldev=_ghf_getWS_Dev(*pwSet);
@@ -281,20 +262,11 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                               &cliBinStatus, &pwSet->APIErr);
                 if(pwSet->APIErr) {
 #ifdef __OCLH_BUILD_LOG_TO_STDOUT_FLAG
-                    {
-                        FILE* pTmpFilePtr=pwSet->Log.pfOut;
-                        pwSet->Log.pfOut=stdout;
-                        _ghf_logWS_DevInfoShort(*pwSet,_ghf_getWS_Dev(*pwSet),
-                                                NULL);
-                        snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG,
-                                 "clCreateProgramWithBinary");
-                        _ghf_logWS_APIErr(*pwSet, pcLogMsg);
-                        _ghf_logWS_Delim(*pwSet);
-                        pwSet->Log.pfOut=pTmpFilePtr;
-                    }
+              __CLAPI_CREATE_PROGRAM_ERR_TO_STDOUT("clCreateProgramWithBinary");
 #endif /* __OCLH_BUILD_LOG_TO_STDOUT_FLAG */
                     __CLAPI_ERR_ROUTINE_WITH_RET("clCreateProgramWithBinary")
-                } }
+                }
+              }
             break;
             default:
               { char pcLogMsg[_GHM_MAXLEN_OF_LOGMSG];
@@ -302,7 +274,8 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                          "Unknown program source");
                 _ghf_logWS_APIErr(*pwSet, pcLogMsg);
                 _ghf_wipeWS(pwSet);
-                return(_GHM_UNKNOWN_PROGRAM_SOURCE_ERROR); }
+                return(_GHM_UNKNOWN_PROGRAM_SOURCE_ERROR);
+              }
             }
             {
                 cl_int cliBuildCLAPIErr=CL_SUCCESS;
@@ -337,9 +310,8 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
 #endif /* defined(__OCLH_COMPILER_ONLY_FLAG) &&
           !defined(__OCLH_LINKER_ONLY_FLAG) */
                 cliBuildCLAPIErr=pwSet->APIErr;
-                while(_ghf_getWS_BuildStatus(*pwSet)==CL_BUILD_IN_PROGRESS) ;
-                _ghf_logWS_BuildInfo(*pwSet, pwSet->Program, clWrkDev,
-                                     LogLvl, _GHE_NO_BUILD_LOG);
+                while(_ghf_getWS_BuildStatus(*pwSet)==CL_BUILD_IN_PROGRESS)
+                    ;
                 {
                     _GHT_LOG TmpLog=_ghf_declLog();
                     /* cl_uint cluNofKernels=0u; */
@@ -347,6 +319,8 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                     cl_build_status clBuildStatus=
                                         _ghf_getWS_BuildStatus(*pwSet);
                     cl_program_binary_type clBinType=0u;
+                    _ghf_logWS_BuildInfo(*pwSet, pwSet->Program, clWrkDev,
+                                         LogLvl, BuildLogMode);
                     if(BuildLogMode==_GHE_BUILD_LOG_IN_SEPARATED_FILES) {
                         TmpLog=pwSet->Log;
                         int32_t err=0;
@@ -384,23 +358,15 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
                                   _ghf_wipeWS(pwSet); return(err); }
                         _ghf_logWS_DevInfoShort(*pwSet,
                                                 _ghf_getWS_Dev(*pwSet), NULL);
-#ifdef __OCLH_BUILD_LOG_TO_STDOUT_FLAG
-                        {
-                            FILE* pTmpFilePtr=pwSet->Log.pfOut;
-                            pwSet->Log.pfOut=stdout;
-                            _ghf_logWS_DevInfoShort(*pwSet,
-                                                    _ghf_getWS_Dev(*pwSet),
-                                                    NULL);
-                            pwSet->Log.pfOut=pTmpFilePtr;
-                        }
-#endif /* __OCLH_BUILD_LOG_TO_STDOUT_FLAG */
+                        _ghf_logWS_BuildInfo(*pwSet, pwSet->Program, clWrkDev,
+                                             LogLvl, BuildLogMode);
                     }
-                    _ghf_logWS_BuildInfo(*pwSet, pwSet->Program, clWrkDev,
-                                         LogLvl, BuildLogMode);
 #ifdef __OCLH_BUILD_LOG_TO_STDOUT_FLAG
                     {
                         FILE* pTmpFilePtr=pwSet->Log.pfOut;
                         pwSet->Log.pfOut=stdout;
+                        _ghf_logWS_DevInfoShort(*pwSet,_ghf_getWS_Dev(*pwSet),
+                                                NULL);
                         _ghf_logWS_BuildInfo(*pwSet, pwSet->Program, clWrkDev,
                                              LogLvl, BuildLogMode);
                         pwSet->Log.pfOut=pTmpFilePtr;
@@ -604,6 +570,7 @@ int32_t _ghf_genrWS(      _GHT_WRKSET* const  pwSet,
     _ghf_logWS_Delim(*pwSet);
     return(_GHM_OK);
 /**/
+#undef __CLAPI_CREATE_PROGRAM_ERR_TO_STDOUT
 #undef __CLAPI_ERR_ROUTINE_WITH_RET
 /**/
 }
@@ -697,8 +664,13 @@ int32_t _ghf_isWS_LogValid(const _GHT_WRKSET wSet) {
     return(_ghf_isLog_Valid(wSet.Log));
 }
 
-int32_t _ghf_recalcWS_WIWG(_GHT_WRKSET* const pwSet,
-                           const size_t szNofAllTasks) {
+int32_t _ghf_recalcWS_WIWG_1x1(_GHT_WRKSET* const pwSet) {
+    pwSet->szNofAllWI=1ul; pwSet->szNofWIinWG=1ul;
+    return(_GHM_OK);
+}
+
+int32_t _ghf_recalcWS_WIWG_dense(_GHT_WRKSET* const pwSet,
+                                 const size_t szNofAllTasks) {
     if((szNofAllTasks/pwSet->szNofCmpUnits)>=pwSet->szMaxWGsz) {
         size_t r=0ul;
         pwSet->szNofWIinWG=pwSet->szMaxWGsz;
@@ -1038,9 +1010,9 @@ int32_t _ghf_saveWS_ProgramBinaries(_GHT_WRKSET wSet,
     return(_GHM_OK);
 }
 
-int32_t _ghf_wdcChkWS_APIErr (_GHT_WRKSET wSet,
-                          const char* const pcAPICall,
-                          const int32_t i32FlashFlag) {
+int32_t _ghf_wdcChkWS_APIErr(_GHT_WRKSET wSet,
+                             const char* const pcAPICall,
+                             const int32_t i32FlashFlag) {
     if(wSet.APIErr) {
         _ghf_logWS_APIErr(wSet,pcAPICall);
         if(wSet.pvDat) wSet.pfnDatCleaner(wSet);
@@ -1102,6 +1074,56 @@ int32_t _ghf_wdcSetWS_KerErrToZero(_GHT_WRKSET wSet) {
     return(_GHM_OK);
 }
 
+/**/
+#define __GDM_SET_KERNEL_ARGS_BY_VA_LIST_FROM_NAME_W_RET(___GDM_SKA_MAC_Arg) \
+{ \
+    cl_uint cluNofKerArgs=0u, i=0u; \
+    size_t szRes=0ul; \
+    char pcLogMsg[_GHM_MAXLEN_OF_LOGMSG]; \
+    va_list volArgList; \
+    wSet.APIErr=clGetKernelInfo(kr, CL_KERNEL_NUM_ARGS, \
+                                sizeof(cl_uint), &cluNofKerArgs, &szRes); \
+    snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG, \
+             "%s/clGetKernelInfo/CL_KERNEL_NUM_ARGS", __func__); \
+    if(_ghf_wdcChkWS_APIErr(wSet,pcLogMsg,_GHM_NOFL)) return(wSet.APIErr); \
+    va_start(volArgList,___GDM_SKA_MAC_Arg); \
+    snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG,"%s/clSetKernelArg",__func__); \
+    for(i=0;i<cluNofKerArgs;i++) { \
+        size_t sz=va_arg(volArgList,size_t); \
+        const void* addr=va_arg(volArgList,const void*); \
+        wSet.APIErr=clSetKernelArg(kr,i,sz,addr); \
+        if(_ghf_wdcChkWS_APIErr(wSet,pcLogMsg,_GHM_NOFL)) return(wSet.APIErr); \
+    } \
+    va_end(volArgList); \
+}
+/**/
+
+int32_t _ghf_wdcSetWS_KernArgs(_GHT_WRKSET wSet,const cl_kernel kr,...) {
+    __GDM_SET_KERNEL_ARGS_BY_VA_LIST_FROM_NAME_W_RET(kr);
+    return(_GHM_OK);
+}
+int32_t _ghf_wdcBExecWS_Kern(_GHT_WRKSET wSet,
+                            const char* const pcKernName, ...) {
+    int32_t err=0;
+    const cl_kernel kr=_ghf_getWS_KernByName(wSet,pcKernName);
+    if(kr==_GHM_UNDEFPTR) {
+        if(wSet.pvDat) wSet.pfnDatCleaner(wSet);
+        return(_GHM_UNAVALABLE_KERNEL_ERROR);
+    }
+    if((err=_ghf_wdcSetWS_KerErrToZero(wSet))) return(err);
+    __GDM_SET_KERNEL_ARGS_BY_VA_LIST_FROM_NAME_W_RET(pcKernName);
+    wSet.APIErr=clEnqueueNDRangeKernel(wSet.Queue,kr,1u,0ul,
+                                       &wSet.szNofAllWI,&wSet.szNofWIinWG,
+                                       0u,NULL,NULL);
+    if((err=_ghf_wdcChkWS_APIErr(wSet,"clEnqueueNDRangeKernel",_GHM_FL)))
+                                                                    return(err);
+    if((err=_ghf_wdcChkWS_KerErr(wSet,kr))) return(err);
+    return(_GHM_OK);
+}
+/**/
+#undef __GDM_SET_KERNEL_ARGS_BY_VA_LIST_FROM_NAME_W_RET
+/**/
+
 _GHT_DEVLIST_DESC _ghf_declDevLstDesc(void) {
     _GHT_DEVLIST_DESC dld={ .pcluIdxs=NULL, .pcWC=NULL };
     return(dld);
@@ -1169,7 +1191,7 @@ _GHT_AWSS _ghf_declAWSs(const _GHE_LOGLVL  LogLvl,
 int32_t _ghf_genrAWSs(      _GHT_AWSS*   const pAWSs,
                       const _GHT_LOG           Log,
                       const _GHT_DEVLIST_DESC  DevLstDesc,
-                      const char**       const ppcCLProgramSources,
+                            char**       const ppcCLProgramSources,
                       const _GHE_SRCTYPE       SourceType,
                       const char*        const OCLBuildOpts
 #if defined(__OCLH_BUILDER_FLAG) || \
@@ -1248,12 +1270,14 @@ int32_t _ghf_genrAWSs(      _GHT_AWSS*   const pAWSs,
             } else { /* подразумевается валидное описание устройств */
                 cl_uint j=0u;
                 while(DevLst[j]) {
-                    if(__ghf_cmpCharPtrAndWC(
-                                _ghf_getDevInf_charptr(DevLst[j],CL_DEVICE_NAME,
-                                                       &tmpWS.APIErr),
-                                DevLstDesc.pcWC, __GHE_CASE_SENSITIVE)) {
+                    char* pcDevName=_ghf_getDevInf_charptr(DevLst[j],
+                                                           CL_DEVICE_NAME,
+                                                           &tmpWS.APIErr);
+                    if(__ghf_cmpCharPtrAndWC(pcDevName,DevLstDesc.pcWC,
+                                             __GHE_CASE_SENSITIVE)) {
                         err=_ghf_addDevIdxToDevLstDesc(&TmpDevLstDesc,j);
                         if(err) {
+                            if(pcDevName) { free(pcDevName);  pcDevName =NULL; }
                             _ghf_wipeDevLstDesc(&TmpDevLstDesc);
                             _ghf_wipeAWSs(pAWSs);
                             _ghf_wipeDevList(&DevLst);
@@ -1262,6 +1286,7 @@ int32_t _ghf_genrAWSs(      _GHT_AWSS*   const pAWSs,
                         }
                         cluNofWS++;
                     }
+                    if(pcDevName) { free(pcDevName);  pcDevName=NULL; }
                     j++;
                 }
             }
@@ -1443,15 +1468,13 @@ int32_t _ghf_wipeDevList(cl_device_id** const ppDevLst) {
     return(_GHM_OK);
 }
 
-
-
 int32_t __ghf_setWS_TextProgramId(_GHT_WRKSET wSet,
                                   char* const pcDst,
                                   char* const pcOutputPrefix) {
     char pcDefProgName[64]="\0",
          pcDefDevName[64]="\0",
-         *pcApiDevName=_ghf_getDevInf_charptr(_ghf_getWS_Dev(wSet),
-                                              CL_DEVICE_NAME, &wSet.APIErr);
+        *pcApiDevName=_ghf_getDevInf_charptr(_ghf_getWS_Dev(wSet),
+                                             CL_DEVICE_NAME, &wSet.APIErr);
     snprintf(pcDefProgName, 64ul,
              "program_0x%04lx", __GHM_U64STRIPTO2B((uint64_t)wSet.Program));
     snprintf(pcDefDevName, 64ul,
@@ -1462,6 +1485,7 @@ int32_t __ghf_setWS_TextProgramId(_GHT_WRKSET wSet,
     }
     if(pcApiDevName) {
         __ghf_removePreNPostSpacesFromCharPtr(pcApiDevName);
+        __ghf_replaceIllegalFSCharsInCharPtr(pcApiDevName);
         __ghf_replaceSpacesWithUnderscoreInCharPtr(pcApiDevName);
     }
     snprintf(pcDst,_GHM_MAX_PATH_LENGTH,"%s/%s-%s",_GHM_LOG_PATH,

+ 2 - 2
src/oclh_h_ws_base_log.c

@@ -56,9 +56,9 @@ int32_t _ghf_logWS_Hdr(const _GHT_WRKSET wSet,const char* const pcLogHdr) {
 int32_t _ghf_logWS_Delim(const _GHT_WRKSET wSet) {
     pthread_mutex_lock(wSet.Log.pMtx);
         __ghf_logWS_MsgIgnoringLock(wSet,
-             "_______________________________________________________________");
+             "__________________________________________________");
         __ghf_logWS_MsgIgnoringLock(wSet,
-             "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
+             "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
     pthread_mutex_unlock(wSet.Log.pMtx);
     return(_GHM_OK);
 }

+ 3 - 4
src/oclh_h_ws_log_clapi_reps.c

@@ -895,7 +895,7 @@ int32_t _ghf_logWS_DevInfoShort(      _GHT_WRKSET  wSet,
     {
         char* pcVendor=_ghf_getDevInf_charptr(_ghf_getWS_Dev(wSet),
                                               CL_DEVICE_VENDOR, &wSet.APIErr),
-             *pcModel=_ghf_getDevInf_charptr(_ghf_getWS_Dev(wSet),
+            * pcModel=_ghf_getDevInf_charptr(_ghf_getWS_Dev(wSet),
                                               CL_DEVICE_NAME, &wSet.APIErr);
         __ghf_removePreNPostSpacesFromCharPtr(pcVendor);
         __ghf_removePreNPostSpacesFromCharPtr(pcModel);
@@ -909,9 +909,8 @@ int32_t _ghf_logWS_DevInfoShort(      _GHT_WRKSET  wSet,
     {
         char* pcHWvers=_ghf_getDevInf_charptr(clDev, CL_DEVICE_VERSION,
                                               &wSet.APIErr),
-             *pcHWlang=_ghf_getDevInf_charptr(clDev, CL_DEVICE_OPENCL_C_VERSION,
-                                              &wSet.APIErr);;
-        pcHWvers=_ghf_getDevInf_charptr(clDev, CL_DEVICE_VERSION, &wSet.APIErr);
+            * pcHWlang=_ghf_getDevInf_charptr(clDev, CL_DEVICE_OPENCL_C_VERSION,
+                                              &wSet.APIErr);
         snprintf(pcLogMsg, _GHM_MAXLEN_OF_LOGMSG,
                  "%s HW ver./lang ver.: %s / %s", pcDevPrefix,
                  pcHWvers?pcHWvers:"Undefined",