/* memcpy - copy a block from source to destination. 31/64 bit S/390 version. Copyright (C) 2012-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see . */ #include #include "asm-syntax.h" #include /* INPUT PARAMETERS %r2 = address of destination memory area %r3 = address of source memory area %r4 = number of bytes to copy. */ .text #if defined __s390x__ # define LTGR ltgr # define CGHI cghi # define LGR lgr # define AGHI aghi # define BRCTG brctg #else # define LTGR ltr # define CGHI chi # define LGR lr # define AGHI ahi # define BRCTG brct #endif /* ! defined __s390x__ */ #if HAVE_MEMCPY_Z900_G5 ENTRY(MEMPCPY_Z900_G5) # if defined __s390x__ .machine "z900" # else .machine "g5" # endif /* ! defined __s390x__ */ LGR %r1,%r2 # Use as dest la %r2,0(%r4,%r2) # Return dest + n j .L_Z900_G5_start END(MEMPCPY_Z900_G5) ENTRY(MEMCPY_Z900_G5) # if defined __s390x__ .machine "z900" # else .machine "g5" # endif /* ! defined __s390x__ */ LGR %r1,%r2 # r1: Use as dest ; r2: Return dest .L_Z900_G5_start: LTGR %r4,%r4 je .L_Z900_G5_4 AGHI %r4,-1 # if defined __s390x__ srlg %r5,%r4,8 # else lr %r5,%r4 srl %r5,8 # endif /* ! defined __s390x__ */ LTGR %r5,%r5 jne .L_Z900_G5_13 .L_Z900_G5_3: # if defined __s390x__ larl %r5,.L_Z900_G5_15 # define Z900_G5_EX_D 0 # else basr %r5,0 .L_Z900_G5_14: # define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14 # endif /* ! defined __s390x__ */ ex %r4,Z900_G5_EX_D(%r5) .L_Z900_G5_4: br %r14 .L_Z900_G5_13: CGHI %r5,4096 # Switch to mvcle for copies >1MB jh __memcpy_mvcle .L_Z900_G5_12: mvc 0(256,%r1),0(%r3) la %r1,256(%r1) la %r3,256(%r3) BRCTG %r5,.L_Z900_G5_12 j .L_Z900_G5_3 .L_Z900_G5_15: mvc 0(1,%r1),0(%r3) END(MEMCPY_Z900_G5) #endif /* HAVE_MEMCPY_Z900_G5 */ ENTRY(__memcpy_mvcle) # Using as standalone function will result in unexpected # results since the length field is incremented by 1 in order to # compensate the changes already done in the functions above. LGR %r0,%r2 # backup return dest [ + n ] AGHI %r4,1 # length + 1 LGR %r5,%r4 # source length LGR %r4,%r3 # source address LGR %r2,%r1 # destination address LGR %r3,%r5 # destination length = source length .L_MVCLE_1: mvcle %r2,%r4,0 # thats it, MVCLE is your friend jo .L_MVCLE_1 LGR %r2,%r0 # return destination address br %r14 END(__memcpy_mvcle) #undef LTGR #undef CGHI #undef LGR #undef AGHI #undef BRCTG #if HAVE_MEMCPY_Z10 ENTRY(MEMPCPY_Z10) .machine "z10" .machinemode "zarch_nohighgprs" lgr %r1,%r2 # Use as dest la %r2,0(%r4,%r2) # Return dest + n j .L_Z10_start END(MEMPCPY_Z10) ENTRY(MEMCPY_Z10) .machine "z10" .machinemode "zarch_nohighgprs" lgr %r1,%r2 # r1: Use as dest ; r2: Return dest .L_Z10_start: # if !defined __s390x__ llgfr %r4,%r4 # endif /* !defined __s390x__ */ cgije %r4,0,.L_Z10_4 aghi %r4,-1 srlg %r5,%r4,8 cgijlh %r5,0,.L_Z10_13 .L_Z10_3: exrl %r4,.L_Z10_15 .L_Z10_4: br %r14 .L_Z10_13: cgfi %r5,65535 # Switch to mvcle for copies >16MB jh __memcpy_mvcle .L_Z10_12: pfd 1,768(%r3) pfd 2,768(%r1) mvc 0(256,%r1),0(%r3) la %r1,256(%r1) la %r3,256(%r3) brctg %r5,.L_Z10_12 j .L_Z10_3 .L_Z10_15: mvc 0(1,%r1),0(%r3) END(MEMCPY_Z10) #endif /* HAVE_MEMCPY_Z10 */ #if HAVE_MEMCPY_Z196 ENTRY(MEMPCPY_Z196) .machine "z196" .machinemode "zarch_nohighgprs" lgr %r1,%r2 # Use as dest la %r2,0(%r4,%r2) # Return dest + n j .L_Z196_start END(MEMPCPY_Z196) ENTRY(MEMCPY_Z196) .machine "z196" .machinemode "zarch_nohighgprs" lgr %r1,%r2 # r1: Use as dest ; r2: Return dest .L_Z196_start: # if !defined __s390x__ llgfr %r4,%r4 # endif /* !defined __s390x__ */ ltgr %r4,%r4 je .L_Z196_4 .L_Z196_start2: aghi %r4,-1 srlg %r5,%r4,8 ltgr %r5,%r5 jne .L_Z196_5 .L_Z196_3: exrl %r4,.L_Z196_14 .L_Z196_4: br %r14 .L_Z196_5: cgfi %r5,262144 # Switch to mvcle for copies >64MB jh __memcpy_mvcle .L_Z196_2: pfd 1,768(%r3) pfd 2,768(%r1) mvc 0(256,%r1),0(%r3) aghi %r5,-1 la %r1,256(%r1) la %r3,256(%r3) jne .L_Z196_2 j .L_Z196_3 .L_Z196_14: mvc 0(1,%r1),0(%r3) END(MEMCPY_Z196) #endif /* HAVE_MEMCPY_Z196 */ #if HAVE_MEMMOVE_Z13 ENTRY(MEMMOVE_Z13) .machine "z13" .machinemode "zarch_nohighgprs" # if !defined __s390x__ /* Note: The 31bit dst and src pointers are prefixed with zeroes. */ llgfr %r4,%r4 llgfr %r3,%r3 llgfr %r2,%r2 # endif /* !defined __s390x__ */ sgrk %r0,%r2,%r3 clgijh %r4,16,.L_MEMMOVE_Z13_LARGE aghik %r5,%r4,-1 .L_MEMMOVE_Z13_SMALL: jl .L_MEMMOVE_Z13_END /* Jump away if len was zero. */ /* Store up to 16 bytes with vll/vstl which needs the index instead of lengths. */ vll %v16,%r5,0(%r3) vstl %v16,%r5,0(%r2) .L_MEMMOVE_Z13_END: br %r14 .L_MEMMOVE_Z13_LARGE: lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */ /* The unsigned comparison (dst - src >= len) determines if we can execute the forward case with memcpy. */ #if ! HAVE_MEMCPY_Z196 # error The z13 variant of memmove needs the z196 variant of memcpy! #endif clgrjhe %r0,%r4,.L_Z196_start2 risbgn %r5,%r4,4,128+63,60 /* r5 = r4 / 16 */ aghi %r4,-16 clgijhe %r5,8,.L_MEMMOVE_Z13_LARGE_64B .L_MEMMOVE_Z13_LARGE_16B_LOOP: /* Store at least 16 bytes with vl/vst. The number of 16byte blocks is stored in r5. */ vl %v16,0(%r4,%r3) vst %v16,0(%r4,%r2) aghi %r4,-16 brctg %r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP aghik %r5,%r4,15 j .L_MEMMOVE_Z13_SMALL .L_MEMMOVE_Z13_LARGE_64B: /* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks will be stored in r0. */ aghi %r4,-48 srlg %r0,%r5,2 /* r5 = %r0 / 4 => Number of 64byte blocks. */ .L_MEMMOVE_Z13_LARGE_64B_LOOP: vl %v20,48(%r4,%r3) vl %v19,32(%r4,%r3) vl %v18,16(%r4,%r3) vl %v17,0(%r4,%r3) vst %v20,48(%r4,%r2) vst %v19,32(%r4,%r2) vst %v18,16(%r4,%r2) vst %v17,0(%r4,%r2) aghi %r4,-64 brctg %r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP aghi %r4,48 /* Recalculate the number of 16byte blocks. */ risbg %r5,%r5,62,128+63,0 /* r5 = r5 & 3 => Remaining 16byte blocks. */ jne .L_MEMMOVE_Z13_LARGE_16B_LOOP aghik %r5,%r4,15 j .L_MEMMOVE_Z13_SMALL END(MEMMOVE_Z13) #endif /* HAVE_MEMMOVE_Z13 */ #if HAVE_MEMMOVE_ARCH13 ENTRY(MEMMOVE_ARCH13) .machine "arch13" .machinemode "zarch_nohighgprs" # if ! defined __s390x__ /* Note: The 31bit dst and src pointers are prefixed with zeroes. */ llgfr %r4,%r4 llgfr %r3,%r3 llgfr %r2,%r2 # endif /* ! defined __s390x__ */ sgrk %r5,%r2,%r3 aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index. */ clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE .L_MEMMOVE_ARCH13_SMALL: jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik). */ /* Store up to 16 bytes with vll/vstl (needs highest index). */ vll %v16,%r0,0(%r3) vstl %v16,%r0,0(%r2) .L_MEMMOVE_ARCH13_END: br %r14 .L_MEMMOVE_ARCH13_LARGE: lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */ /* The unsigned comparison (dst - src >= len) determines if we can execute the forward case with memcpy. */ #if ! HAVE_MEMCPY_Z196 # error The arch13 variant of memmove needs the z196 variant of memcpy! #endif /* Backward case. */ clgrjhe %r5,%r4,.L_Z196_start2 clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B /* Move up to 256bytes with mvcrl (move right to left). */ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ br %r14 .L_MEMMOVE_ARCH13_LARGER_256B: /* First move the "remaining" block of up to 256 bytes at the end of src/dst buffers. Then move blocks of 256bytes in a loop starting with the block at the end. (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers passed to mvcrl instructions are aligned, too) */ risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256 */ risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF */ slgr %r4,%r0 lay %r1,-1(%r4,%r1) lay %r3,-1(%r4,%r3) mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ lghi %r0,255 /* Always copy 256 bytes in the loop below! */ .L_MEMMOVE_ARCH13_LARGE_256B_LOOP: aghi %r1,-256 aghi %r3,-256 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP br %r14 END(MEMMOVE_ARCH13) #endif /* HAVE_MEMMOVE_ARCH13 */ #if ! HAVE_MEMCPY_IFUNC /* If we don't use ifunc, define an alias for mem[p]cpy here. Otherwise see sysdeps/s390/mem[p]cpy.c. */ strong_alias (MEMCPY_DEFAULT, memcpy) strong_alias (MEMPCPY_DEFAULT, __mempcpy) weak_alias (__mempcpy, mempcpy) #endif #if ! HAVE_MEMMOVE_IFUNC /* If we don't use ifunc, define an alias for memmove here. Otherwise see sysdeps/s390/memmove.c. */ # if ! HAVE_MEMMOVE_C /* If the c variant is needed, then sysdeps/s390/memmove-c.c defines memmove. Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */ strong_alias (MEMMOVE_DEFAULT, memmove) # endif #endif #if defined SHARED && IS_IN (libc) /* Defines the internal symbols. Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c. */ strong_alias (MEMCPY_DEFAULT, __GI_memcpy) strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy) strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy) # if ! HAVE_MEMMOVE_C /* If the c variant is needed, then sysdeps/s390/memmove-c.c defines the internal symbol. Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */ strong_alias (MEMMOVE_DEFAULT, __GI_memmove) # endif #endif