oc11111111

最新推荐文章于 2024-07-26 12:28:02 发布
luosha112
最新推荐文章于 2024-07-26 12:28:02 发布
阅读量188
点赞数
分类专栏： hotspot 文章标签： java
本文链接：https://blog.csdn.net/aa111sadsa/article/details/134238160
版权
hotspot 专栏收录该内容
28 篇文章 0 订阅
订阅专栏
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/assembler_linux_x86.cpp
/*
 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "runtime/os.hpp"
#include "runtime/threadLocalStorage.hpp"

#ifndef _LP64
void MacroAssembler::int3() {
  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
}

#ifdef MINIMIZE_RAM_USAGE

void MacroAssembler::get_thread(Register thread) {
  // call pthread_getspecific
  // void * pthread_getspecific(pthread_key_t key);
  if (thread != rax) push(rax);
  push(rcx);
  push(rdx);

  push(ThreadLocalStorage::thread_index());
  call(RuntimeAddress(CAST_FROM_FN_PTR(address, pthread_getspecific)));
  increment(rsp, wordSize);

  pop(rdx);
  pop(rcx);
  if (thread != rax) {
    mov(thread, rax);
    pop(rax);
  }
}

#else
void MacroAssembler::get_thread(Register thread) {
  movl(thread, rsp);
  shrl(thread, PAGE_SHIFT);

  ExternalAddress tls_base((address)ThreadLocalStorage::sp_map_addr());
  Address index(noreg, thread, Address::times_4);
  ArrayAddress tls(tls_base, index);

  movptr(thread, tls);
}
#endif // MINIMIZE_RAM_USAGE
#else
void MacroAssembler::int3() {
  call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
}

void MacroAssembler::get_thread(Register thread) {
  // call pthread_getspecific
  // void * pthread_getspecific(pthread_key_t key);
   if (thread != rax) {
     push(rax);
   }
   push(rdi);
   push(rsi);
   push(rdx);
   push(rcx);
   push(r8);
   push(r9);
   push(r10);
   // XXX
   mov(r10, rsp);
   andq(rsp, -16);
   push(r10);
   push(r11);

   movl(rdi, ThreadLocalStorage::thread_index());
   call(RuntimeAddress(CAST_FROM_FN_PTR(address, pthread_getspecific)));

   pop(r11);
   pop(rsp);
   pop(r10);
   pop(r9);
   pop(r8);
   pop(rcx);
   pop(rdx);
   pop(rsi);
   pop(rdi);
   if (thread != rax) {
       mov(thread, rax);
       pop(rax);
   }
}
#endif
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/atomic_linux_x86.inline.hpp
/*
 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_ATOMIC_LINUX_X86_INLINE_HPP
#define OS_CPU_LINUX_X86_VM_ATOMIC_LINUX_X86_INLINE_HPP

#include "runtime/atomic.hpp"
#include "runtime/os.hpp"
#include "vm_version_x86.hpp"

// Implementation of class atomic

inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }

inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }


// Adding a lock prefix to an instruction on MP machine
#define LOCK_IF_MP(mp) "cmp $0, " #mp "; je 1f; lock; 1: "

inline jint     Atomic::add    (jint     add_value, volatile jint*     dest) {
  jint addend = add_value;
  int mp = os::is_MP();
  __asm__ volatile (  LOCK_IF_MP(%3) "xaddl %0,(%2)"
                    : "=r" (addend)
                    : "0" (addend), "r" (dest), "r" (mp)
                    : "cc", "memory");
  return addend + add_value;
}

inline void Atomic::inc    (volatile jint*     dest) {
  int mp = os::is_MP();
  __asm__ volatile (LOCK_IF_MP(%1) "addl $1,(%0)" :
                    : "r" (dest), "r" (mp) : "cc", "memory");
}

inline void Atomic::inc_ptr(volatile void*     dest) {
  inc_ptr((volatile intptr_t*)dest);
}

inline void Atomic::dec    (volatile jint*     dest) {
  int mp = os::is_MP();
  __asm__ volatile (LOCK_IF_MP(%1) "subl $1,(%0)" :
                    : "r" (dest), "r" (mp) : "cc", "memory");
}

inline void Atomic::dec_ptr(volatile void*     dest) {
  dec_ptr((volatile intptr_t*)dest);
}

inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
  __asm__ volatile (  "xchgl (%2),%0"
                    : "=r" (exchange_value)
                    : "0" (exchange_value), "r" (dest)
                    : "memory");
  return exchange_value;
}

inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}


inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value) {
  int mp = os::is_MP();
  __asm__ volatile (LOCK_IF_MP(%4) "cmpxchgl %1,(%3)"
                    : "=a" (exchange_value)
                    : "r" (exchange_value), "a" (compare_value), "r" (dest), "r" (mp)
                    : "cc", "memory");
  return exchange_value;
}

#ifdef AMD64
inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }

inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) {
  intptr_t addend = add_value;
  bool mp = os::is_MP();
  __asm__ __volatile__ (LOCK_IF_MP(%3) "xaddq %0,(%2)"
                        : "=r" (addend)
                        : "0" (addend), "r" (dest), "r" (mp)
                        : "cc", "memory");
  return addend + add_value;
}

inline void*    Atomic::add_ptr(intptr_t add_value, volatile void*     dest) {
  return (void*)add_ptr(add_value, (volatile intptr_t*)dest);
}

inline void Atomic::inc_ptr(volatile intptr_t* dest) {
  bool mp = os::is_MP();
  __asm__ __volatile__ (LOCK_IF_MP(%1) "addq $1,(%0)"
                        :
                        : "r" (dest), "r" (mp)
                        : "cc", "memory");
}

inline void Atomic::dec_ptr(volatile intptr_t* dest) {
  bool mp = os::is_MP();
  __asm__ __volatile__ (LOCK_IF_MP(%1) "subq $1,(%0)"
                        :
                        : "r" (dest), "r" (mp)
                        : "cc", "memory");
}

inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
  __asm__ __volatile__ ("xchgq (%2),%0"
                        : "=r" (exchange_value)
                        : "0" (exchange_value), "r" (dest)
                        : "memory");
  return exchange_value;
}

inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value) {
  bool mp = os::is_MP();
  __asm__ __volatile__ (LOCK_IF_MP(%4) "cmpxchgq %1,(%3)"
                        : "=a" (exchange_value)
                        : "r" (exchange_value), "a" (compare_value), "r" (dest), "r" (mp)
                        : "cc", "memory");
  return exchange_value;
}

inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) {
  return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value);
}

inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value) {
  return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value);
}

inline jlong Atomic::load(volatile jlong* src) { return *src; }

#else // !AMD64

inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) {
  return (intptr_t)Atomic::add((jint)add_value, (volatile jint*)dest);
}

inline void*    Atomic::add_ptr(intptr_t add_value, volatile void*     dest) {
  return (void*)Atomic::add((jint)add_value, (volatile jint*)dest);
}


inline void Atomic::inc_ptr(volatile intptr_t* dest) {
  inc((volatile jint*)dest);
}

inline void Atomic::dec_ptr(volatile intptr_t* dest) {
  dec((volatile jint*)dest);
}

inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
  return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
}

extern "C" {
  // defined in linux_x86.s
  jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong, bool);
  void _Atomic_move_long(volatile jlong* src, volatile jlong* dst);
}

inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value) {
  return _Atomic_cmpxchg_long(exchange_value, dest, compare_value, os::is_MP());
}

inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) {
  return (intptr_t)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value);
}

inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value) {
  return (void*)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value);
}

inline jlong Atomic::load(volatile jlong* src) {
  volatile jlong dest;
  _Atomic_move_long(src, &dest);
  return dest;
}

inline void Atomic::store(jlong store_value, jlong* dest) {
  _Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
}

inline void Atomic::store(jlong store_value, volatile jlong* dest) {
  _Atomic_move_long((volatile jlong*)&store_value, dest);
}

#endif // AMD64

#endif // OS_CPU_LINUX_X86_VM_ATOMIC_LINUX_X86_INLINE_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/bytes_linux_x86.inline.hpp
/*
 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_BYTES_LINUX_X86_INLINE_HPP
#define OS_CPU_LINUX_X86_VM_BYTES_LINUX_X86_INLINE_HPP

#include <byteswap.h>

// Efficient swapping of data bytes from Java byte
// ordering to native byte ordering and vice versa.
inline u2   Bytes::swap_u2(u2 x) {
#ifdef AMD64
  return bswap_16(x);
#else
  u2 ret;
  __asm__ __volatile__ (
    "movw %0, %%ax;"
    "xchg %%al, %%ah;"
    "movw %%ax, %0"
    :"=r" (ret)      // output : register 0 => ret
    :"0"  (x)        // input  : x => register 0
    :"ax", "0"       // clobbered registers
  );
  return ret;
#endif // AMD64
}

inline u4   Bytes::swap_u4(u4 x) {
#ifdef AMD64
  return bswap_32(x);
#else
  u4 ret;
  __asm__ __volatile__ (
    "bswap %0"
    :"=r" (ret)      // output : register 0 => ret
    :"0"  (x)        // input  : x => register 0
    :"0"             // clobbered register
  );
  return ret;
#endif // AMD64
}

#ifdef AMD64
inline u8 Bytes::swap_u8(u8 x) {
#ifdef SPARC_WORKS
  // workaround for SunStudio12 CR6615391
  __asm__ __volatile__ (
    "bswapq %0"
    :"=r" (x)        // output : register 0 => x
    :"0"  (x)        // input  : x => register 0
    :"0"             // clobbered register
  );
  return x;
#else
  return bswap_64(x);
#endif
}
#else
// Helper function for swap_u8
inline u8   Bytes::swap_u8_base(u4 x, u4 y) {
  return (((u8)swap_u4(x))<<32) | swap_u4(y);
}

inline u8 Bytes::swap_u8(u8 x) {
  return swap_u8_base(*(u4*)&x, *(((u4*)&x)+1));
}
#endif // !AMD64

#endif // OS_CPU_LINUX_X86_VM_BYTES_LINUX_X86_INLINE_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/copy_linux_x86.inline.hpp
/*
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP
#define OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP

static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
  (void)memmove(to, from, count * HeapWordSize);
#else
  // Includes a zero-count check.
  intx temp = 0;
  __asm__ volatile("        testl   %6,%6         ;"
                   "        jz      7f            ;"
                   "        cmpl    %4,%5         ;"
                   "        leal    -4(%4,%6,4),%3;"
                   "        jbe     1f            ;"
                   "        cmpl    %7,%5         ;"
                   "        jbe     4f            ;"
                   "1:      cmpl    $32,%6        ;"
                   "        ja      3f            ;"
                   "        subl    %4,%1         ;"
                   "2:      movl    (%4),%3       ;"
                   "        movl    %7,(%5,%4,1)  ;"
                   "        addl    $4,%0         ;"
                   "        subl    $1,%2          ;"
                   "        jnz     2b            ;"
                   "        jmp     7f            ;"
                   "3:      rep;    smovl         ;"
                   "        jmp     7f            ;"
                   "4:      cmpl    $32,%2        ;"
                   "        movl    %7,%0         ;"
                   "        leal    -4(%5,%6,4),%1;"
                   "        ja      6f            ;"
                   "        subl    %4,%1         ;"
                   "5:      movl    (%4),%3       ;"
                   "        movl    %7,(%5,%4,1)  ;"
                   "        subl    $4,%0         ;"
                   "        subl    $1,%2          ;"
                   "        jnz     5b            ;"
                   "        jmp     7f            ;"
                   "6:      std                   ;"
                   "        rep;    smovl         ;"
                   "        cld                   ;"
                   "7:      nop                    "
                   : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
                   : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
                   : "memory", "flags");
#endif // AMD64
}

static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
  switch (count) {
  case 8:  to[7] = from[7];
  case 7:  to[6] = from[6];
  case 6:  to[5] = from[5];
  case 5:  to[4] = from[4];
  case 4:  to[3] = from[3];
  case 3:  to[2] = from[2];
  case 2:  to[1] = from[1];
  case 1:  to[0] = from[0];
  case 0:  break;
  default:
    (void)memcpy(to, from, count * HeapWordSize);
    break;
  }
#else
  // Includes a zero-count check.
  intx temp = 0;
  __asm__ volatile("        testl   %6,%6       ;"
                   "        jz      3f          ;"
                   "        cmpl    $32,%6      ;"
                   "        ja      2f          ;"
                   "        subl    %4,%1       ;"
                   "1:      movl    (%4),%3     ;"
                   "        movl    %7,(%5,%4,1);"
                   "        addl    $4,%0       ;"
                   "        subl    $1,%2        ;"
                   "        jnz     1b          ;"
                   "        jmp     3f          ;"
                   "2:      rep;    smovl       ;"
                   "3:      nop                  "
                   : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
                   : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
                   : "memory", "cc");
#endif // AMD64
}

static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
  switch (count) {
  case 8:  to[7] = from[7];
  case 7:  to[6] = from[6];
  case 6:  to[5] = from[5];
  case 5:  to[4] = from[4];
  case 4:  to[3] = from[3];
  case 3:  to[2] = from[2];
  case 2:  to[1] = from[1];
  case 1:  to[0] = from[0];
  case 0:  break;
  default:
    while (count-- > 0) {
      *to++ = *from++;
    }
    break;
  }
#else
  // pd_disjoint_words is word-atomic in this implementation.
  pd_disjoint_words(from, to, count);
#endif // AMD64
}

static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
  pd_conjoint_words(from, to, count);
}

static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
  pd_disjoint_words(from, to, count);
}

static void pd_conjoint_bytes(void* from, void* to, size_t count) {
#ifdef AMD64
  (void)memmove(to, from, count);
#else
  // Includes a zero-count check.
  intx temp = 0;
  __asm__ volatile("        testl   %6,%6          ;"
                   "        jz      13f            ;"
                   "        cmpl    %4,%5          ;"
                   "        leal    -1(%4,%6),%3   ;"
                   "        jbe     1f             ;"
                   "        cmpl    %7,%5          ;"
                   "        jbe     8f             ;"
                   "1:      cmpl    $3,%6          ;"
                   "        jbe     6f             ;"
                   "        movl    %6,%3          ;"
                   "        movl    $4,%2          ;"
                   "        subl    %4,%2          ;"
                   "        andl    $3,%2          ;"
                   "        jz      2f             ;"
                   "        subl    %6,%3          ;"
                   "        rep;    smovb          ;"
                   "2:      movl    %7,%2          ;"
                   "        shrl    $2,%2          ;"
                   "        jz      5f             ;"
                   "        cmpl    $32,%2         ;"
                   "        ja      4f             ;"
                   "        subl    %4,%1          ;"
                   "3:      movl    (%4),%%edx     ;"
                   "        movl    %%edx,(%5,%4,1);"
                   "        addl    $4,%0          ;"
                   "        subl    $1,%2           ;"
                   "        jnz     3b             ;"
                   "        addl    %4,%1          ;"
                   "        jmp     5f             ;"
                   "4:      rep;    smovl          ;"
                   "5:      movl    %7,%2          ;"
                   "        andl    $3,%2          ;"
                   "        jz      13f            ;"
                   "6:      xorl    %7,%3          ;"
                   "7:      movb    (%4,%7,1),%%dl ;"
                   "        movb    %%dl,(%5,%7,1) ;"
                   "        addl    $1,%3          ;"
                   "        subl    $1,%2           ;"
                   "        jnz     7b             ;"
                   "        jmp     13f            ;"
                   "8:      std                    ;"
                   "        cmpl    $12,%2         ;"
                   "        ja      9f             ;"
                   "        movl    %7,%0          ;"
                   "        leal    -1(%6,%5),%1   ;"
                   "        jmp     11f            ;"
                   "9:      xchgl   %3,%2          ;"
                   "        movl    %6,%0          ;"
                   "        addl    $1,%2          ;"
                   "        leal    -1(%7,%5),%1   ;"
                   "        andl    $3,%2          ;"
                   "        jz      10f            ;"
                   "        subl    %6,%3          ;"
                   "        rep;    smovb          ;"
                   "10:     movl    %7,%2          ;"
                   "        subl    $3,%0          ;"
                   "        shrl    $2,%2          ;"
                   "        subl    $3,%1          ;"
                   "        rep;    smovl          ;"
                   "        andl    $3,%3          ;"
                   "        jz      12f            ;"
                   "        movl    %7,%2          ;"
                   "        addl    $3,%0          ;"
                   "        addl    $3,%1          ;"
                   "11:     rep;    smovb          ;"
                   "12:     cld                    ;"
                   "13:     nop                    ;"
                   : "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
                   : "0"  (from), "1"  (to), "2"  (count), "3"  (temp)
                   : "memory", "flags", "%edx");
#endif // AMD64
}

static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
  pd_conjoint_bytes(from, to, count);
}

static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
  _Copy_conjoint_jshorts_atomic(from, to, count);
}

static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
#ifdef AMD64
  _Copy_conjoint_jints_atomic(from, to, count);
#else
  assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
  // pd_conjoint_words is word-atomic in this implementation.
  pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}

static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
#ifdef AMD64
  _Copy_conjoint_jlongs_atomic(from, to, count);
#else
  // Guarantee use of fild/fistp or xmm regs via some asm code, because compilers won't.
  if (from > to) {
    while (count-- > 0) {
      __asm__ volatile("fildll (%0); fistpll (%1)"
                       :
                       : "r" (from), "r" (to)
                       : "memory" );
      ++from;
      ++to;
    }
  } else {
    while (count-- > 0) {
      __asm__ volatile("fildll (%0,%2,8); fistpll (%1,%2,8)"
                       :
                       : "r" (from), "r" (to), "r" (count)
                       : "memory" );
    }
  }
#endif // AMD64
}

static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
#ifdef AMD64
  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
  _Copy_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
#else
  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
  // pd_conjoint_words is word-atomic in this implementation.
  pd_conjoint_words((HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}

static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
  _Copy_arrayof_conjoint_bytes(from, to, count);
}

static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
  _Copy_arrayof_conjoint_jshorts(from, to, count);
}

static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
   _Copy_arrayof_conjoint_jints(from, to, count);
#else
  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
#endif // AMD64
}

static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
  _Copy_arrayof_conjoint_jlongs(from, to, count);
#else
  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
#endif // AMD64
}

static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
  _Copy_arrayof_conjoint_jlongs(from, to, count);
#else
  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
#endif // AMD64
}

#endif // OS_CPU_LINUX_X86_VM_COPY_LINUX_X86_INLINE_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/globals_linux_x86.hpp
/*
 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_GLOBALS_LINUX_X86_HPP
#define OS_CPU_LINUX_X86_VM_GLOBALS_LINUX_X86_HPP

// Sets the default values for platform dependent flags used by the runtime system.
// (see globals.hpp)

define_pd_global(bool, DontYieldALot,            false);
#ifdef AMD64
define_pd_global(intx, ThreadStackSize,          1024); // 0 => use system default
define_pd_global(intx, VMThreadStackSize,        1024);
#else
// ThreadStackSize 320 allows a couple of test cases to run while
// keeping the number of threads that can be created high.  System
// default ThreadStackSize appears to be 512 which is too big.
define_pd_global(intx, ThreadStackSize,          320);
define_pd_global(intx, VMThreadStackSize,        512);
#endif // AMD64

define_pd_global(intx, CompilerThreadStackSize,  0);

define_pd_global(uintx,JVMInvokeMethodSlack,     8192);

// Used on 64 bit platforms for UseCompressedOops base address
define_pd_global(uintx,HeapBaseMinAddress,       2*G);

#endif // OS_CPU_LINUX_X86_VM_GLOBALS_LINUX_X86_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/linux_x86_32.s
# 
# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
# 

	
        # NOTE WELL!  The _Copy functions are called directly
	# from server-compiler-generated code via CallLeafNoFP,
	# which means that they *must* either not use floating
	# point or use it in the same manner as does the server
	# compiler.
	
        .globl _Copy_conjoint_bytes
        .globl _Copy_arrayof_conjoint_bytes
        .globl _Copy_conjoint_jshorts_atomic
	.globl _Copy_arrayof_conjoint_jshorts
        .globl _Copy_conjoint_jints_atomic
        .globl _Copy_arrayof_conjoint_jints
	.globl _Copy_conjoint_jlongs_atomic
	.globl _mmx_Copy_arrayof_conjoint_jshorts

        .globl _Atomic_cmpxchg_long
        .globl _Atomic_move_long

	.text

        .globl  SpinPause
	.type   SpinPause,@function
        .p2align 4,,15
SpinPause:
        rep
        nop
        movl    $1, %eax
        ret

        # Support for void Copy::conjoint_bytes(void* from,
        #                                       void* to,
        #                                       size_t count)
        .p2align 4,,15
	.type    _Copy_conjoint_bytes,@function
_Copy_conjoint_bytes:
        pushl    %esi
        movl     4+12(%esp),%ecx      # count
        pushl    %edi
        movl     8+ 4(%esp),%esi      # from
        movl     8+ 8(%esp),%edi      # to
        cmpl     %esi,%edi
        leal     -1(%esi,%ecx),%eax   # from + count - 1
        jbe      cb_CopyRight
        cmpl     %eax,%edi
        jbe      cb_CopyLeft
        # copy from low to high
cb_CopyRight:
        cmpl     $3,%ecx
        jbe      5f                   # <= 3 bytes
        # align source address at dword address boundary
        movl     %ecx,%eax            # original count
        movl     $4,%ecx
        subl     %esi,%ecx
        andl     $3,%ecx              # prefix byte count
        jz       1f                   # no prefix
        subl     %ecx,%eax            # byte count less prefix
        # copy prefix
        subl     %esi,%edi
0:      movb     (%esi),%dl
        movb     %dl,(%edi,%esi,1)
        addl     $1,%esi
        subl     $1,%ecx
        jnz      0b
        addl     %esi,%edi
1:      movl     %eax,%ecx            # byte count less prefix
        shrl     $2,%ecx              # dword count
        jz       4f                   # no dwords to move
        cmpl     $32,%ecx
        jbe      2f                   # <= 32 dwords
        # copy aligned dwords
        rep;     smovl
        jmp      4f
        # copy aligned dwords
2:      subl     %esi,%edi
        .p2align 4,,15
3:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        addl     $4,%esi
        subl     $1,%ecx
        jnz      3b
        addl     %esi,%edi
4:      movl     %eax,%ecx            # byte count less prefix
5:      andl     $3,%ecx              # suffix byte count
        jz       7f                   # no suffix
        # copy suffix
        xorl     %eax,%eax
6:      movb     (%esi,%eax,1),%dl
        movb     %dl,(%edi,%eax,1)
        addl     $1,%eax
        subl     $1,%ecx
        jnz      6b
7:      popl     %edi
        popl     %esi
        ret
        # copy from high to low
cb_CopyLeft:
        std
        leal     -4(%edi,%ecx),%edi   # to + count - 4
        movl     %eax,%esi            # from + count - 1
        movl     %ecx,%eax
        subl     $3,%esi              # from + count - 4
        cmpl     $3,%ecx
        jbe      5f                   # <= 3 bytes
1:      shrl     $2,%ecx              # dword count
        jz       4f                   # no dwords to move
        cmpl     $32,%ecx
        ja       3f                   # > 32 dwords
        # copy dwords, aligned or not
        subl     %esi,%edi
        .p2align 4,,15
2:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        subl     $4,%esi
        subl     $1,%ecx
        jnz      2b
        addl     %esi,%edi
        jmp      4f
        # copy dwords, aligned or not
3:      rep;     smovl
4:      movl     %eax,%ecx            # byte count
5:      andl     $3,%ecx              # suffix byte count
        jz       7f                   # no suffix
        # copy suffix
        subl     %esi,%edi
        addl     $3,%esi
6:      movb     (%esi),%dl
        movb     %dl,(%edi,%esi,1)
	subl     $1,%esi
        subl     $1,%ecx
        jnz      6b
7:      cld
        popl     %edi
        popl     %esi
        ret

        # Support for void Copy::arrayof_conjoint_bytes(void* from,
        #                                               void* to,
        #                                               size_t count)
        #
        # Same as _Copy_conjoint_bytes, except no source alignment check.
        .p2align 4,,15
	.type    _Copy_arrayof_conjoint_bytes,@function
_Copy_arrayof_conjoint_bytes:
        pushl    %esi
        movl     4+12(%esp),%ecx      # count
        pushl    %edi
        movl     8+ 4(%esp),%esi      # from
        movl     8+ 8(%esp),%edi      # to
        cmpl     %esi,%edi
        leal     -1(%esi,%ecx),%eax   # from + count - 1
        jbe      acb_CopyRight
        cmpl     %eax,%edi
        jbe      acb_CopyLeft 
        # copy from low to high
acb_CopyRight:
        cmpl     $3,%ecx
        jbe      5f
1:      movl     %ecx,%eax
        shrl     $2,%ecx
        jz       4f
        cmpl     $32,%ecx
        ja       3f
        # copy aligned dwords
        subl     %esi,%edi
        .p2align 4,,15
2:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        addl     $4,%esi
        subl     $1,%ecx
        jnz      2b
        addl     %esi,%edi
        jmp      4f
        # copy aligned dwords
3:      rep;     smovl
4:      movl     %eax,%ecx
5:      andl     $3,%ecx
        jz       7f
        # copy suffix
        xorl     %eax,%eax
6:      movb     (%esi,%eax,1),%dl
        movb     %dl,(%edi,%eax,1)
        addl     $1,%eax
        subl     $1,%ecx
        jnz      6b
7:      popl     %edi
        popl     %esi
        ret
acb_CopyLeft:
        std
        leal     -4(%edi,%ecx),%edi   # to + count - 4
        movl     %eax,%esi            # from + count - 1
        movl     %ecx,%eax
        subl     $3,%esi              # from + count - 4
        cmpl     $3,%ecx
        jbe      5f
1:      shrl     $2,%ecx
        jz       4f
        cmpl     $32,%ecx
        jbe      2f                   # <= 32 dwords
        rep;     smovl
        jmp      4f
	.space 8
2:      subl     %esi,%edi
        .p2align 4,,15
3:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        subl     $4,%esi
        subl     $1,%ecx
        jnz      3b
        addl     %esi,%edi
4:      movl     %eax,%ecx
5:      andl     $3,%ecx
        jz       7f
        subl     %esi,%edi
        addl     $3,%esi
6:      movb     (%esi),%dl
        movb     %dl,(%edi,%esi,1)
	subl     $1,%esi
        subl     $1,%ecx
        jnz      6b
7:      cld
        popl     %edi
        popl     %esi
        ret

        # Support for void Copy::conjoint_jshorts_atomic(void* from,
        #                                                void* to,
        #                                                size_t count)
        .p2align 4,,15
	.type    _Copy_conjoint_jshorts_atomic,@function
_Copy_conjoint_jshorts_atomic:
        pushl    %esi
        movl     4+12(%esp),%ecx      # count
        pushl    %edi
        movl     8+ 4(%esp),%esi      # from
        movl     8+ 8(%esp),%edi      # to
        cmpl     %esi,%edi
        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
        jbe      cs_CopyRight
        cmpl     %eax,%edi
        jbe      cs_CopyLeft 
        # copy from low to high
cs_CopyRight:
        # align source address at dword address boundary
        movl     %esi,%eax            # original from
        andl     $3,%eax              # either 0 or 2
        jz       1f                   # no prefix
        # copy prefix
        subl     $1,%ecx
        jl       5f                   # zero count
        movw     (%esi),%dx
        movw     %dx,(%edi)
        addl     %eax,%esi            # %eax == 2
        addl     %eax,%edi
1:      movl     %ecx,%eax            # word count less prefix
        sarl     %ecx                 # dword count
        jz       4f                   # no dwords to move
        cmpl     $32,%ecx
        jbe      2f                   # <= 32 dwords
        # copy aligned dwords
        rep;     smovl
        jmp      4f 
        # copy aligned dwords
2:      subl     %esi,%edi
        .p2align 4,,15
3:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        addl     $4,%esi
        subl     $1,%ecx
        jnz      3b
        addl     %esi,%edi
4:      andl     $1,%eax              # suffix count
        jz       5f                   # no suffix
        # copy suffix
        movw     (%esi),%dx
        movw     %dx,(%edi)
5:      popl     %edi
        popl     %esi
        ret
        # copy from high to low
cs_CopyLeft:
        std
        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
        movl     %eax,%esi            # from + count*2 - 2
        movl     %ecx,%eax
        subl     $2,%esi              # from + count*2 - 4
1:      sarl     %ecx                 # dword count
        jz       4f                   # no dwords to move
        cmpl     $32,%ecx
        ja       3f                   # > 32 dwords
        subl     %esi,%edi
        .p2align 4,,15
2:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        subl     $4,%esi
        subl     $1,%ecx
        jnz      2b
        addl     %esi,%edi
        jmp      4f
3:      rep;     smovl
4:      andl     $1,%eax              # suffix count
        jz       5f                   # no suffix
        # copy suffix
        addl     $2,%esi
        addl     $2,%edi
        movw     (%esi),%dx
        movw     %dx,(%edi)
5:      cld
        popl     %edi
        popl     %esi
        ret

        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
        #                                                 void* to,
        #                                                 size_t count)
        .p2align 4,,15
	.type    _Copy_arrayof_conjoint_jshorts,@function
_Copy_arrayof_conjoint_jshorts:
        pushl    %esi
        movl     4+12(%esp),%ecx      # count
        pushl    %edi
        movl     8+ 4(%esp),%esi      # from
        movl     8+ 8(%esp),%edi      # to
        cmpl     %esi,%edi
        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
        jbe      acs_CopyRight
        cmpl     %eax,%edi
        jbe      acs_CopyLeft 
acs_CopyRight:
        movl     %ecx,%eax            # word count
        sarl     %ecx                 # dword count
        jz       4f                   # no dwords to move
        cmpl     $32,%ecx
        jbe      2f                   # <= 32 dwords
        # copy aligned dwords
        rep;     smovl
        jmp      4f 
        # copy aligned dwords
        .space 5
2:      subl     %esi,%edi 
        .p2align 4,,15
3:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        addl     $4,%esi
        subl     $1,%ecx
        jnz      3b
        addl     %esi,%edi
4:      andl     $1,%eax              # suffix count
        jz       5f                   # no suffix
        # copy suffix
        movw     (%esi),%dx
        movw     %dx,(%edi)
5:      popl     %edi
        popl     %esi
        ret
acs_CopyLeft:
        std
        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
        movl     %eax,%esi            # from + count*2 - 2
        movl     %ecx,%eax
        subl     $2,%esi              # from + count*2 - 4
        sarl     %ecx                 # dword count
        jz       4f                   # no dwords to move
        cmpl     $32,%ecx
        ja       3f                   # > 32 dwords
        subl     %esi,%edi
        .p2align 4,,15
2:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        subl     $4,%esi
        subl     $1,%ecx
        jnz      2b
        addl     %esi,%edi
        jmp      4f
3:      rep;     smovl
4:      andl     $1,%eax              # suffix count
        jz       5f                   # no suffix
        # copy suffix
        addl     $2,%esi
        addl     $2,%edi
        movw     (%esi),%dx
        movw     %dx,(%edi)
5:      cld
        popl     %edi
        popl     %esi
        ret

        # Support for void Copy::conjoint_jints_atomic(void* from,
        #                                              void* to,
        #                                              size_t count)
        # Equivalent to
        #   arrayof_conjoint_jints
        .p2align 4,,15
	.type    _Copy_conjoint_jints_atomic,@function
	.type    _Copy_arrayof_conjoint_jints,@function
_Copy_conjoint_jints_atomic:
_Copy_arrayof_conjoint_jints:
        pushl    %esi
        movl     4+12(%esp),%ecx      # count
        pushl    %edi
        movl     8+ 4(%esp),%esi      # from
        movl     8+ 8(%esp),%edi      # to
        cmpl     %esi,%edi
        leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
        jbe      ci_CopyRight
        cmpl     %eax,%edi
        jbe      ci_CopyLeft 
ci_CopyRight:
        cmpl     $32,%ecx
        jbe      2f                   # <= 32 dwords
        rep;     smovl
        popl     %edi
        popl     %esi
        ret
        .space 10
2:      subl     %esi,%edi
        jmp      4f
        .p2align 4,,15
3:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        addl     $4,%esi
4:      subl     $1,%ecx
        jge      3b
        popl     %edi
        popl     %esi
        ret
ci_CopyLeft:
        std
        leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
        cmpl     $32,%ecx
        ja       4f                   # > 32 dwords
        subl     %eax,%edi            # eax == from + count*4 - 4
        jmp      3f
        .p2align 4,,15
2:      movl     (%eax),%edx
        movl     %edx,(%edi,%eax,1)
        subl     $4,%eax
3:      subl     $1,%ecx
        jge      2b
        cld
        popl     %edi
        popl     %esi
        ret
4:      movl     %eax,%esi            # from + count*4 - 4
        rep;     smovl
        cld
        popl     %edi
        popl     %esi
        ret
	
        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
        #                                               jlong* to,
        #                                               size_t count)
        #
        # 32-bit
        #
        # count treated as signed
        #
        # if (from > to) {
        #   while (--count >= 0) {
        #     *to++ = *from++;
        #   }
        # } else {
        #   while (--count >= 0) {
        #     to[count] = from[count];
        #   }
        # }
        .p2align 4,,15
	.type    _Copy_conjoint_jlongs_atomic,@function
_Copy_conjoint_jlongs_atomic:
        movl     4+8(%esp),%ecx       # count
        movl     4+0(%esp),%eax       # from
        movl     4+4(%esp),%edx       # to
        cmpl     %eax,%edx
        jae      cla_CopyLeft
cla_CopyRight:
        subl     %eax,%edx
        jmp      2f
        .p2align 4,,15
1:      fildll   (%eax)
        fistpll  (%edx,%eax,1)
        addl     $8,%eax
2:      subl     $1,%ecx
        jge      1b
        ret
        .p2align 4,,15
3:      fildll   (%eax,%ecx,8)
        fistpll  (%edx,%ecx,8)
cla_CopyLeft:
        subl     $1,%ecx
        jge      3b
        ret

        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
        #                                                 void* to,
        #                                                 size_t count)
        .p2align 4,,15
	.type    _mmx_Copy_arrayof_conjoint_jshorts,@function
_mmx_Copy_arrayof_conjoint_jshorts:
        pushl    %esi
        movl     4+12(%esp),%ecx
        pushl    %edi
        movl     8+ 4(%esp),%esi
        movl     8+ 8(%esp),%edi
        cmpl     %esi,%edi
        leal     -2(%esi,%ecx,2),%eax
        jbe      mmx_acs_CopyRight
        cmpl     %eax,%edi
        jbe      mmx_acs_CopyLeft
mmx_acs_CopyRight:
        movl     %ecx,%eax
        sarl     %ecx
        je       5f
        cmpl     $33,%ecx
        jae      3f
1:      subl     %esi,%edi 
        .p2align 4,,15
2:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        addl     $4,%esi
        subl     $1,%ecx
        jnz      2b
        addl     %esi,%edi
        jmp      5f 
3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
        subl     $1,%ecx
4:      .p2align 4,,15
        movq     0(%esi),%mm0
        addl     $64,%edi
        movq     8(%esi),%mm1
        subl     $16,%ecx
        movq     16(%esi),%mm2
        movq     %mm0,-64(%edi)
        movq     24(%esi),%mm0
        movq     %mm1,-56(%edi)
        movq     32(%esi),%mm1
        movq     %mm2,-48(%edi)
        movq     40(%esi),%mm2
        movq     %mm0,-40(%edi)
        movq     48(%esi),%mm0
        movq     %mm1,-32(%edi)
        movq     56(%esi),%mm1
        movq     %mm2,-24(%edi)
        movq     %mm0,-16(%edi)
        addl     $64,%esi
        movq     %mm1,-8(%edi)
        cmpl     $16,%ecx
        jge      4b
        emms
	testl    %ecx,%ecx
	ja       1b
5:      andl     $1,%eax
        je       7f
6:      movw     (%esi),%dx
        movw     %dx,(%edi)
7:	popl     %edi
        popl     %esi
        ret
mmx_acs_CopyLeft:
        std
        leal     -4(%edi,%ecx,2),%edi
        movl     %eax,%esi
        movl     %ecx,%eax
        subl     $2,%esi
        sarl     %ecx
        je       4f
        cmpl     $32,%ecx
        ja       3f
        subl     %esi,%edi
        .p2align 4,,15
2:      movl     (%esi),%edx
        movl     %edx,(%edi,%esi,1)
        subl     $4,%esi
        subl     $1,%ecx
        jnz      2b
        addl     %esi,%edi
        jmp      4f
3:      rep;     smovl
4:      andl     $1,%eax
        je       6f
        addl     $2,%esi
        addl     $2,%edi
5:      movw     (%esi),%dx
        movw     %dx,(%edi)
6:      cld
        popl     %edi
        popl     %esi
        ret


        # Support for jlong Atomic::cmpxchg(jlong exchange_value,
        #                                   volatile jlong* dest,
        #                                   jlong compare_value,
        #                                   bool is_MP)
        #
        .p2align 4,,15
	.type    _Atomic_cmpxchg_long,@function
_Atomic_cmpxchg_long:
                                   #  8(%esp) : return PC
        pushl    %ebx              #  4(%esp) : old %ebx
        pushl    %edi              #  0(%esp) : old %edi
        movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
        movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
        movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
        movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
        movl     20(%esp), %edi    # 20(%esp) : dest
        cmpl     $0, 32(%esp)      # 32(%esp) : is_MP
        je       1f
        lock
1:      cmpxchg8b (%edi)
        popl     %edi
        popl     %ebx
        ret


        # Support for jlong Atomic::load and Atomic::store.
        # void _Atomic_move_long(volatile jlong* src, volatile jlong* dst)
        .p2align 4,,15
	.type    _Atomic_move_long,@function
_Atomic_move_long:
        movl     4(%esp), %eax   # src
        fildll    (%eax)
        movl     8(%esp), %eax   # dest
        fistpll   (%eax)
        ret
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/linux_x86_64.s
# 
# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#


        # NOTE WELL!  The _Copy functions are called directly
	# from server-compiler-generated code via CallLeafNoFP,
	# which means that they *must* either not use floating
	# point or use it in the same manner as does the server
	# compiler.
	
        .globl _Copy_arrayof_conjoint_bytes
	.globl _Copy_arrayof_conjoint_jshorts
        .globl _Copy_conjoint_jshorts_atomic
        .globl _Copy_arrayof_conjoint_jints
        .globl _Copy_conjoint_jints_atomic
        .globl _Copy_arrayof_conjoint_jlongs
        .globl _Copy_conjoint_jlongs_atomic

	.text

        .globl SpinPause
        .align 16
        .type  SpinPause,@function
SpinPause:
        rep
        nop
        movq   $1, %rax
        ret

        # Support for void Copy::arrayof_conjoint_bytes(void* from,
        #                                               void* to,
        #                                               size_t count)
        # rdi - from
        # rsi - to
        # rdx - count, treated as ssize_t
        #
        .p2align 4,,15
	.type    _Copy_arrayof_conjoint_bytes,@function
_Copy_arrayof_conjoint_bytes:
        movq     %rdx,%r8             # byte count
        shrq     $3,%rdx              # qword count
        cmpq     %rdi,%rsi
        leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
        jbe      acb_CopyRight
        cmpq     %rax,%rsi
        jbe      acb_CopyLeft 
acb_CopyRight:
        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
        negq     %rdx
        jmp      7f
        .p2align 4,,15
1:      movq     8(%rax,%rdx,8),%rsi
        movq     %rsi,8(%rcx,%rdx,8)
        addq     $1,%rdx
        jnz      1b
2:      testq    $4,%r8               # check for trailing dword
        jz       3f
        movl     8(%rax),%esi         # copy trailing dword
        movl     %esi,8(%rcx)
        addq     $4,%rax
        addq     $4,%rcx              # original %rsi is trashed, so we
                                      #  can't use it as a base register
3:      testq    $2,%r8               # check for trailing word
        jz       4f
        movw     8(%rax),%si          # copy trailing word
        movw     %si,8(%rcx)
        addq     $2,%rcx
4:      testq    $1,%r8               # check for trailing byte
        jz       5f
        movb     -1(%rdi,%r8,1),%al   # copy trailing byte
        movb     %al,8(%rcx)
5:      ret
        .p2align 4,,15
6:      movq     -24(%rax,%rdx,8),%rsi
        movq     %rsi,-24(%rcx,%rdx,8)
        movq     -16(%rax,%rdx,8),%rsi
        movq     %rsi,-16(%rcx,%rdx,8)
        movq     -8(%rax,%rdx,8),%rsi
        movq     %rsi,-8(%rcx,%rdx,8)
        movq     (%rax,%rdx,8),%rsi
        movq     %rsi,(%rcx,%rdx,8)
7:      addq     $4,%rdx
        jle      6b
        subq     $4,%rdx
        jl       1b
        jmp      2b
acb_CopyLeft:
        testq    $1,%r8               # check for trailing byte
        jz       1f
        movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
        movb     %cl,-1(%rsi,%r8,1)
        subq     $1,%r8               # adjust for possible trailing word
1:      testq    $2,%r8               # check for trailing word
        jz       2f
        movw     -2(%rdi,%r8,1),%cx   # copy trailing word
        movw     %cx,-2(%rsi,%r8,1)
2:      testq    $4,%r8               # check for trailing dword
        jz       5f
        movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
        movl     %ecx,(%rsi,%rdx,8)
        jmp      5f
        .p2align 4,,15
3:      movq     -8(%rdi,%rdx,8),%rcx
        movq     %rcx,-8(%rsi,%rdx,8)
        subq     $1,%rdx
        jnz      3b
        ret
        .p2align 4,,15
4:      movq     24(%rdi,%rdx,8),%rcx
        movq     %rcx,24(%rsi,%rdx,8)
        movq     16(%rdi,%rdx,8),%rcx
        movq     %rcx,16(%rsi,%rdx,8)
        movq     8(%rdi,%rdx,8),%rcx
        movq     %rcx,8(%rsi,%rdx,8)
        movq     (%rdi,%rdx,8),%rcx
        movq     %rcx,(%rsi,%rdx,8)
5:      subq     $4,%rdx
        jge      4b
        addq     $4,%rdx
        jg       3b
        ret

        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
        #                                                 void* to,
        #                                                 size_t count)
        # Equivalent to
        #   conjoint_jshorts_atomic
        #
        # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
        # let the hardware handle it.  The tow or four words within dwords
        # or qwords that span cache line boundaries will still be loaded
        # and stored atomically.
        #
        # rdi - from
        # rsi - to
        # rdx - count, treated as ssize_t
        #
        .p2align 4,,15
	.type    _Copy_arrayof_conjoint_jshorts,@function
	.type    _Copy_conjoint_jshorts_atomic,@function
_Copy_arrayof_conjoint_jshorts:
_Copy_conjoint_jshorts_atomic:
        movq     %rdx,%r8             # word count
        shrq     $2,%rdx              # qword count
        cmpq     %rdi,%rsi
        leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
        jbe      acs_CopyRight
        cmpq     %rax,%rsi
        jbe      acs_CopyLeft 
acs_CopyRight:
        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
        negq     %rdx
        jmp      6f
1:      movq     8(%rax,%rdx,8),%rsi
        movq     %rsi,8(%rcx,%rdx,8)
        addq     $1,%rdx
        jnz      1b
2:      testq    $2,%r8               # check for trailing dword
        jz       3f
        movl     8(%rax),%esi         # copy trailing dword
        movl     %esi,8(%rcx)
        addq     $4,%rcx              # original %rsi is trashed, so we
                                      #  can't use it as a base register
3:      testq    $1,%r8               # check for trailing word
        jz       4f
        movw     -2(%rdi,%r8,2),%si   # copy trailing word
        movw     %si,8(%rcx)
4:      ret
        .p2align 4,,15
5:      movq     -24(%rax,%rdx,8),%rsi
        movq     %rsi,-24(%rcx,%rdx,8)
        movq     -16(%rax,%rdx,8),%rsi
        movq     %rsi,-16(%rcx,%rdx,8)
        movq     -8(%rax,%rdx,8),%rsi
        movq     %rsi,-8(%rcx,%rdx,8)
        movq     (%rax,%rdx,8),%rsi
        movq     %rsi,(%rcx,%rdx,8)
6:      addq     $4,%rdx
        jle      5b
        subq     $4,%rdx
        jl       1b
        jmp      2b
acs_CopyLeft:
        testq    $1,%r8               # check for trailing word
        jz       1f
        movw     -2(%rdi,%r8,2),%cx   # copy trailing word
        movw     %cx,-2(%rsi,%r8,2)
1:      testq    $2,%r8               # check for trailing dword
        jz       4f
        movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
        movl     %ecx,(%rsi,%rdx,8)
        jmp      4f
2:      movq     -8(%rdi,%rdx,8),%rcx
        movq     %rcx,-8(%rsi,%rdx,8)
        subq     $1,%rdx
        jnz      2b
        ret
        .p2align 4,,15
3:      movq     24(%rdi,%rdx,8),%rcx
        movq     %rcx,24(%rsi,%rdx,8)
        movq     16(%rdi,%rdx,8),%rcx
        movq     %rcx,16(%rsi,%rdx,8)
        movq     8(%rdi,%rdx,8),%rcx
        movq     %rcx,8(%rsi,%rdx,8)
        movq     (%rdi,%rdx,8),%rcx
        movq     %rcx,(%rsi,%rdx,8)
4:      subq     $4,%rdx
        jge      3b
        addq     $4,%rdx
        jg       2b
        ret

        # Support for void Copy::arrayof_conjoint_jints(jint* from,
        #                                               jint* to,
        #                                               size_t count)
        # Equivalent to
        #   conjoint_jints_atomic
        #
        # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
        # the hardware handle it.  The two dwords within qwords that span
        # cache line boundaries will still be loaded and stored atomically.
        #
        # rdi - from
        # rsi - to
        # rdx - count, treated as ssize_t
        #
        .p2align 4,,15
	.type    _Copy_arrayof_conjoint_jints,@function
	.type    _Copy_conjoint_jints_atomic,@function
_Copy_arrayof_conjoint_jints:
_Copy_conjoint_jints_atomic:
        movq     %rdx,%r8             # dword count
        shrq     %rdx                 # qword count
        cmpq     %rdi,%rsi
        leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
        jbe      aci_CopyRight
        cmpq     %rax,%rsi
        jbe      aci_CopyLeft 
aci_CopyRight:
        leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
        leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
        negq     %rdx
        jmp      5f
        .p2align 4,,15
1:      movq     8(%rax,%rdx,8),%rsi
        movq     %rsi,8(%rcx,%rdx,8)
        addq     $1,%rdx
        jnz       1b
2:      testq    $1,%r8               # check for trailing dword
        jz       3f
        movl     8(%rax),%esi         # copy trailing dword
        movl     %esi,8(%rcx)
3:      ret
        .p2align 4,,15
4:      movq     -24(%rax,%rdx,8),%rsi
        movq     %rsi,-24(%rcx,%rdx,8)
        movq     -16(%rax,%rdx,8),%rsi
        movq     %rsi,-16(%rcx,%rdx,8)
        movq     -8(%rax,%rdx,8),%rsi
        movq     %rsi,-8(%rcx,%rdx,8)
        movq     (%rax,%rdx,8),%rsi
        movq     %rsi,(%rcx,%rdx,8)
5:      addq     $4,%rdx
        jle      4b
        subq     $4,%rdx
        jl       1b
        jmp      2b
aci_CopyLeft:
        testq    $1,%r8               # check for trailing dword
        jz       3f
        movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
        movl     %ecx,-4(%rsi,%r8,4)
        jmp      3f
1:      movq     -8(%rdi,%rdx,8),%rcx
        movq     %rcx,-8(%rsi,%rdx,8)
        subq     $1,%rdx
        jnz      1b
        ret
        .p2align 4,,15
2:      movq     24(%rdi,%rdx,8),%rcx
        movq     %rcx,24(%rsi,%rdx,8)
        movq     16(%rdi,%rdx,8),%rcx
        movq     %rcx,16(%rsi,%rdx,8)
        movq     8(%rdi,%rdx,8),%rcx
        movq     %rcx,8(%rsi,%rdx,8)
        movq     (%rdi,%rdx,8),%rcx
        movq     %rcx,(%rsi,%rdx,8)
3:      subq     $4,%rdx
        jge      2b
        addq     $4,%rdx
        jg       1b
        ret

        # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
        #                                                jlong* to,
        #                                                size_t count)
        # Equivalent to
        #   conjoint_jlongs_atomic
        #   arrayof_conjoint_oops
        #   conjoint_oops_atomic
        #
        # rdi - from
        # rsi - to
        # rdx - count, treated as ssize_t
        #
        .p2align 4,,15
	.type    _Copy_arrayof_conjoint_jlongs,@function
	.type    _Copy_conjoint_jlongs_atomic,@function
_Copy_arrayof_conjoint_jlongs:
_Copy_conjoint_jlongs_atomic:
        cmpq     %rdi,%rsi
        leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
        jbe      acl_CopyRight
        cmpq     %rax,%rsi
        jbe      acl_CopyLeft 
acl_CopyRight:
        leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
        negq     %rdx
        jmp      3f
1:      movq     8(%rax,%rdx,8),%rsi
        movq     %rsi,8(%rcx,%rdx,8)
        addq     $1,%rdx
        jnz      1b
        ret
        .p2align 4,,15
2:      movq     -24(%rax,%rdx,8),%rsi
        movq     %rsi,-24(%rcx,%rdx,8)
        movq     -16(%rax,%rdx,8),%rsi
        movq     %rsi,-16(%rcx,%rdx,8)
        movq     -8(%rax,%rdx,8),%rsi
        movq     %rsi,-8(%rcx,%rdx,8)
        movq     (%rax,%rdx,8),%rsi
        movq     %rsi,(%rcx,%rdx,8)
3:      addq     $4,%rdx
        jle      2b
        subq     $4,%rdx
        jl       1b
        ret
4:      movq     -8(%rdi,%rdx,8),%rcx
        movq     %rcx,-8(%rsi,%rdx,8)
        subq     $1,%rdx
        jnz      4b
        ret
        .p2align 4,,15
5:      movq     24(%rdi,%rdx,8),%rcx
        movq     %rcx,24(%rsi,%rdx,8)
        movq     16(%rdi,%rdx,8),%rcx
        movq     %rcx,16(%rsi,%rdx,8)
        movq     8(%rdi,%rdx,8),%rcx
        movq     %rcx,8(%rsi,%rdx,8)
        movq     (%rdi,%rdx,8),%rcx
        movq     %rcx,(%rsi,%rdx,8)
acl_CopyLeft:
        subq     $4,%rdx
        jge      5b
        addq     $4,%rdx
        jg       4b
        ret
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/orderAccess_linux_x86.inline.hpp
/*
 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_ORDERACCESS_LINUX_X86_INLINE_HPP
#define OS_CPU_LINUX_X86_VM_ORDERACCESS_LINUX_X86_INLINE_HPP

#include "runtime/atomic.inline.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/os.hpp"
#include "vm_version_x86.hpp"

// Implementation of class OrderAccess.

inline void OrderAccess::loadload()   { acquire(); }
inline void OrderAccess::storestore() { release(); }
inline void OrderAccess::loadstore()  { acquire(); }
inline void OrderAccess::storeload()  { fence(); }

inline void OrderAccess::acquire() {
  volatile intptr_t local_dummy;
#ifdef AMD64
  __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory");
#else
  __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory");
#endif // AMD64
}

inline void OrderAccess::release() {
  // Avoid hitting the same cache-line from
  // different threads.
  volatile jint local_dummy = 0;
}

inline void OrderAccess::fence() {
  if (os::is_MP()) {
    // always use locked addl since mfence is sometimes expensive
#ifdef AMD64
    __asm__ volatile ("lock; addl $0,0(%%rsp)" : : : "cc", "memory");
#else
    __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory");
#endif
  }
}

inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { return *p; }
inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { return *p; }
inline jint     OrderAccess::load_acquire(volatile jint*    p) { return *p; }
inline jlong    OrderAccess::load_acquire(volatile jlong*   p) { return Atomic::load(p); }
inline jubyte   OrderAccess::load_acquire(volatile jubyte*  p) { return *p; }
inline jushort  OrderAccess::load_acquire(volatile jushort* p) { return *p; }
inline juint    OrderAccess::load_acquire(volatile juint*   p) { return *p; }
inline julong   OrderAccess::load_acquire(volatile julong*  p) { return Atomic::load((volatile jlong*)p); }
inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) { return *p; }
inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) { return jdouble_cast(Atomic::load((volatile jlong*)p)); }

inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p) { return *p; }
inline void*    OrderAccess::load_ptr_acquire(volatile void*       p) { return *(void* volatile *)p; }
inline void*    OrderAccess::load_ptr_acquire(const volatile void* p) { return *(void* const volatile *)p; }

inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { *p = v; }
inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { *p = v; }
inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { *p = v; }
inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { Atomic::store(v, p); }
inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { *p = v; }
inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { *p = v; }
inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { *p = v; }
inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { Atomic::store((jlong)v, (volatile jlong*)p); }
inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { *p = v; }
inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { release_store((volatile jlong *)p, jlong_cast(v)); }

inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { *p = v; }
inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { *(void* volatile *)p = v; }

inline void     OrderAccess::store_fence(jbyte*  p, jbyte  v) {
  __asm__ volatile (  "xchgb (%2),%0"
                    : "=q" (v)
                    : "0" (v), "r" (p)
                    : "memory");
}
inline void     OrderAccess::store_fence(jshort* p, jshort v) {
  __asm__ volatile (  "xchgw (%2),%0"
                    : "=r" (v)
                    : "0" (v), "r" (p)
                    : "memory");
}
inline void     OrderAccess::store_fence(jint*   p, jint   v) {
  __asm__ volatile (  "xchgl (%2),%0"
                    : "=r" (v)
                    : "0" (v), "r" (p)
                    : "memory");
}

inline void     OrderAccess::store_fence(jlong*   p, jlong   v) {
#ifdef AMD64
  __asm__ __volatile__ ("xchgq (%2), %0"
                        : "=r" (v)
                        : "0" (v), "r" (p)
                        : "memory");
#else
  *p = v; fence();
#endif // AMD64
}

// AMD64 copied the bodies for the the signed version. 32bit did this. As long as the
// compiler does the inlining this is simpler.
inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { store_fence((jbyte*)p,  (jbyte)v);  }
inline void     OrderAccess::store_fence(jushort* p, jushort v) { store_fence((jshort*)p, (jshort)v); }
inline void     OrderAccess::store_fence(juint*   p, juint   v) { store_fence((jint*)p,   (jint)v);   }
inline void     OrderAccess::store_fence(julong*  p, julong  v) { store_fence((jlong*)p,  (jlong)v);  }
inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; fence(); }
inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { store_fence((jlong*)p, jlong_cast(v)); }

inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) {
#ifdef AMD64
  __asm__ __volatile__ ("xchgq (%2), %0"
                        : "=r" (v)
                        : "0" (v), "r" (p)
                        : "memory");
#else
  store_fence((jint*)p, (jint)v);
#endif // AMD64
}

inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) {
#ifdef AMD64
  __asm__ __volatile__ ("xchgq (%2), %0"
                        : "=r" (v)
                        : "0" (v), "r" (p)
                        : "memory");
#else
  store_fence((jint*)p, (jint)v);
#endif // AMD64
}

// Must duplicate definitions instead of calling store_fence because we don't want to cast away volatile.
inline void     OrderAccess::release_store_fence(volatile jbyte*  p, jbyte  v) {
  __asm__ volatile (  "xchgb (%2),%0"
                    : "=q" (v)
                    : "0" (v), "r" (p)
                    : "memory");
}
inline void     OrderAccess::release_store_fence(volatile jshort* p, jshort v) {
  __asm__ volatile (  "xchgw (%2),%0"
                    : "=r" (v)
                    : "0" (v), "r" (p)
                    : "memory");
}
inline void     OrderAccess::release_store_fence(volatile jint*   p, jint   v) {
  __asm__ volatile (  "xchgl (%2),%0"
                    : "=r" (v)
                    : "0" (v), "r" (p)
                    : "memory");
}

inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) {
#ifdef AMD64
  __asm__ __volatile__ (  "xchgq (%2), %0"
                          : "=r" (v)
                          : "0" (v), "r" (p)
                          : "memory");
#else
  release_store(p, v); fence();
#endif // AMD64
}

inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { release_store_fence((volatile jbyte*)p,  (jbyte)v);  }
inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store_fence((volatile jshort*)p, (jshort)v); }
inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { release_store_fence((volatile jint*)p,   (jint)v);   }
inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store_fence((volatile jlong*)p,  (jlong)v);  }

inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { *p = v; fence(); }
inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jlong_cast(v)); }

inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) {
#ifdef AMD64
  __asm__ __volatile__ (  "xchgq (%2), %0"
                          : "=r" (v)
                          : "0" (v), "r" (p)
                          : "memory");
#else
  release_store_fence((volatile jint*)p, (jint)v);
#endif // AMD64
}
inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) {
#ifdef AMD64
  __asm__ __volatile__ (  "xchgq (%2), %0"
                          : "=r" (v)
                          : "0" (v), "r" (p)
                          : "memory");
#else
  release_store_fence((volatile jint*)p, (jint)v);
#endif // AMD64
}

#endif // OS_CPU_LINUX_X86_VM_ORDERACCESS_LINUX_X86_INLINE_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/os_linux_x86.cpp
/*
 * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

// no precompiled headers
#include "asm/macroAssembler.hpp"
#include "classfile/classLoader.hpp"
#include "classfile/systemDictionary.hpp"
#include "classfile/vmSymbols.hpp"
#include "code/icBuffer.hpp"
#include "code/vtableStubs.hpp"
#include "interpreter/interpreter.hpp"
#include "jvm_linux.h"
#include "memory/allocation.inline.hpp"
#include "mutex_linux.inline.hpp"
#include "os_share_linux.hpp"
#include "prims/jniFastGetField.hpp"
#include "prims/jvm.h"
#include "prims/jvm_misc.hpp"
#include "runtime/arguments.hpp"
#include "runtime/extendedPC.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/interfaceSupport.hpp"
#include "runtime/java.hpp"
#include "runtime/javaCalls.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/osThread.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/thread.inline.hpp"
#include "runtime/timer.hpp"
#include "utilities/events.hpp"
#include "utilities/vmError.hpp"

// put OS-includes here
# include <sys/types.h>
# include <sys/mman.h>
# include <pthread.h>
# include <signal.h>
# include <errno.h>
# include <dlfcn.h>
# include <stdlib.h>
# include <stdio.h>
# include <unistd.h>
# include <sys/resource.h>
# include <pthread.h>
# include <sys/stat.h>
# include <sys/time.h>
# include <sys/utsname.h>
# include <sys/socket.h>
# include <sys/wait.h>
# include <pwd.h>
# include <poll.h>
# include <ucontext.h>
# include <fpu_control.h>

#ifdef AMD64
#define REG_SP REG_RSP
#define REG_PC REG_RIP
#define REG_FP REG_RBP
#define SPELL_REG_SP "rsp"
#define SPELL_REG_FP "rbp"
#else
#define REG_SP REG_UESP
#define REG_PC REG_EIP
#define REG_FP REG_EBP
#define SPELL_REG_SP "esp"
#define SPELL_REG_FP "ebp"
#endif // AMD64

PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC

address os::current_stack_pointer() {
#ifdef SPARC_WORKS
  register void *esp;
  __asm__("mov %%"SPELL_REG_SP", %0":"=r"(esp));
  return (address) ((char*)esp + sizeof(long)*2);
#elif defined(__clang__)
  intptr_t* esp;
  __asm__ __volatile__ ("mov %%"SPELL_REG_SP", %0":"=r"(esp):);
  return (address) esp;
#else
  register void *esp __asm__ (SPELL_REG_SP);
  return (address) esp;
#endif
}

char* os::non_memory_address_word() {
  // Must never look like an address returned by reserve_memory,
  // even in its subfields (as defined by the CPU immediate fields,
  // if the CPU splits constants across multiple instructions).

  return (char*) -1;
}

void os::initialize_thread(Thread* thr) {
// Nothing to do.
}

address os::Linux::ucontext_get_pc(ucontext_t * uc) {
  return (address)uc->uc_mcontext.gregs[REG_PC];
}

intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) {
  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
}

intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) {
  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
}

// For Forte Analyzer AsyncGetCallTrace profiling support - thread
// is currently interrupted by SIGPROF.
// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
// frames. Currently we don't do that on Linux, so it's the same as
// os::fetch_frame_from_context().
ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
  ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {

  assert(thread != NULL, "just checking");
  assert(ret_sp != NULL, "just checking");
  assert(ret_fp != NULL, "just checking");

  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
}

ExtendedPC os::fetch_frame_from_context(void* ucVoid,
                    intptr_t** ret_sp, intptr_t** ret_fp) {

  ExtendedPC  epc;
  ucontext_t* uc = (ucontext_t*)ucVoid;

  if (uc != NULL) {
    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
  } else {
    // construct empty ExtendedPC for return value checking
    epc = ExtendedPC(NULL);
    if (ret_sp) *ret_sp = (intptr_t *)NULL;
    if (ret_fp) *ret_fp = (intptr_t *)NULL;
  }

  return epc;
}

frame os::fetch_frame_from_context(void* ucVoid) {
  intptr_t* sp;
  intptr_t* fp;
  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
  return frame(sp, fp, epc.pc());
}

// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
// turned off by -fomit-frame-pointer,
frame os::get_sender_for_C_frame(frame* fr) {
  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
}

intptr_t* _get_previous_fp() {
#ifdef SPARC_WORKS
  register intptr_t **ebp;
  __asm__("mov %%"SPELL_REG_FP", %0":"=r"(ebp));
#elif defined(__clang__)
  intptr_t **ebp;
  __asm__ __volatile__ ("mov %%"SPELL_REG_FP", %0":"=r"(ebp):);
#else
  register intptr_t **ebp __asm__ (SPELL_REG_FP);
#endif
  return (intptr_t*) *ebp;   // we want what it points to.
}


frame os::current_frame() {
  intptr_t* fp = _get_previous_fp();
  frame myframe((intptr_t*)os::current_stack_pointer(),
                (intptr_t*)fp,
                CAST_FROM_FN_PTR(address, os::current_frame));
  if (os::is_first_C_frame(&myframe)) {
    // stack is not walkable
    return frame();
  } else {
    return os::get_sender_for_C_frame(&myframe);
  }
}

// Utility functions

// From IA32 System Programming Guide
enum {
  trap_page_fault = 0xE
};

extern "C" JNIEXPORT int
JVM_handle_linux_signal(int sig,
                        siginfo_t* info,
                        void* ucVoid,
                        int abort_if_unrecognized) {
  ucontext_t* uc = (ucontext_t*) ucVoid;

  Thread* t = ThreadLocalStorage::get_thread_slow();

  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
  // (no destructors can be run)
  os::ThreadCrashProtection::check_crash_protection(sig, t);

  SignalHandlerMark shm(t);

  // Note: it's not uncommon that JNI code uses signal/sigset to install
  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
  // or have a SIGILL handler when detecting CPU type). When that happens,
  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
  // that do not require siginfo/ucontext first.

  if (sig == SIGPIPE || sig == SIGXFSZ) {
    // allow chained handler to go first
    if (os::Linux::chained_handler(sig, info, ucVoid)) {
      return true;
    } else {
      if (PrintMiscellaneous && (WizardMode || Verbose)) {
        char buf[64];
        warning("Ignoring %s - see bugs 4229104 or 646499219",
                os::exception_name(sig, buf, sizeof(buf)));
      }
      return true;
    }
  }

  JavaThread* thread = NULL;
  VMThread* vmthread = NULL;
  if (os::Linux::signal_handlers_are_installed) {
    if (t != NULL ){
      if(t->is_Java_thread()) {
        thread = (JavaThread*)t;
      }
      else if(t->is_VM_thread()){
        vmthread = (VMThread *)t;
      }
    }
  }
/*
  NOTE: does not seem to work on linux.
  if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) {
    // can't decode this kind of signal
    info = NULL;
  } else {
    assert(sig == info->si_signo, "bad siginfo");
  }
*/
  // decide if this trap can be handled by a stub
  address stub = NULL;

  address pc          = NULL;

  //%note os_trap_1
  if (info != NULL && uc != NULL && thread != NULL) {
    pc = (address) os::Linux::ucontext_get_pc(uc);

    if (StubRoutines::is_safefetch_fault(pc)) {
      uc->uc_mcontext.gregs[REG_PC] = intptr_t(StubRoutines::continuation_for_safefetch_fault(pc));
      return 1;
    }

#ifndef AMD64
    // Halt if SI_KERNEL before more crashes get misdiagnosed as Java bugs
    // This can happen in any running code (currently more frequently in
    // interpreter code but has been seen in compiled code)
    if (sig == SIGSEGV && info->si_addr == 0 && info->si_code == SI_KERNEL) {
      fatal("An irrecoverable SI_KERNEL SIGSEGV has occurred due "
            "to unstable signal handling in this distribution.");
    }
#endif // AMD64

    // Handle ALL stack overflow variations here
    if (sig == SIGSEGV) {
      address addr = (address) info->si_addr;

      // check if fault address is within thread stack
      if (addr < thread->stack_base() &&
          addr >= thread->stack_base() - thread->stack_size()) {
        // stack overflow
        if (thread->in_stack_yellow_zone(addr)) {
          thread->disable_stack_yellow_zone();
          if (thread->thread_state() == _thread_in_Java) {
            // Throw a stack overflow exception.  Guard pages will be reenabled
            // while unwinding the stack.
            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
          } else {
            // Thread was in the vm or native code.  Return and try to finish.
            return 1;
          }
        } else if (thread->in_stack_red_zone(addr)) {
          // Fatal red zone violation.  Disable the guard pages and fall through
          // to handle_unexpected_exception way down below.
          thread->disable_stack_red_zone();
          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");

          // This is a likely cause, but hard to verify. Let's just print
          // it as a hint.
          tty->print_raw_cr("Please check if any of your loaded .so files has "
                            "enabled executable stack (see man page execstack(8))");
        } else {
          // Accessing stack address below sp may cause SEGV if current
          // thread has MAP_GROWSDOWN stack. This should only happen when
          // current thread was created by user code with MAP_GROWSDOWN flag
          // and then attached to VM. See notes in os_linux.cpp.
          if (thread->osthread()->expanding_stack() == 0) {
             thread->osthread()->set_expanding_stack();
             if (os::Linux::manually_expand_stack(thread, addr)) {
               thread->osthread()->clear_expanding_stack();
               return 1;
             }
             thread->osthread()->clear_expanding_stack();
          } else {
             fatal("recursive segv. expanding stack.");
          }
        }
      }
    }

    if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) {
      // Verify that OS save/restore AVX registers.
      stub = VM_Version::cpuinfo_cont_addr();
    }

    if (thread->thread_state() == _thread_in_Java) {
      // Java thread running in Java code => find exception handler if any
      // a fault inside compiled code, the interpreter, or a stub

      if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
        stub = SharedRuntime::get_poll_stub(pc);
      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
        // BugId 4454115: A read from a MappedByteBuffer can fault
        // here if the underlying file has been truncated.
        // Do not crash the VM in such a case.
        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
        nmethod* nm = (cb != NULL && cb->is_nmethod()) ? (nmethod*)cb : NULL;
        if (nm != NULL && nm->has_unsafe_access()) {
          stub = StubRoutines::handler_for_unsafe_access();
        }
      }
      else

#ifdef AMD64
      if (sig == SIGFPE  &&
          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
        stub =
          SharedRuntime::
          continuation_for_implicit_exception(thread,
                                              pc,
                                              SharedRuntime::
                                              IMPLICIT_DIVIDE_BY_ZERO);
#else
      if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
        // HACK: si_code does not work on linux 2.2.12-20!!!
        int op = pc[0];
        if (op == 0xDB) {
          // FIST
          // TODO: The encoding of D2I in i486.ad can cause an exception
          // prior to the fist instruction if there was an invalid operation
          // pending. We want to dismiss that exception. From the win_32
          // side it also seems that if it really was the fist causing
          // the exception that we do the d2i by hand with different
          // rounding. Seems kind of weird.
          // NOTE: that we take the exception at the NEXT floating point instruction.
          assert(pc[0] == 0xDB, "not a FIST opcode");
          assert(pc[1] == 0x14, "not a FIST opcode");
          assert(pc[2] == 0x24, "not a FIST opcode");
          return true;
        } else if (op == 0xF7) {
          // IDIV
          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
        } else {
          // TODO: handle more cases if we are using other x86 instructions
          //   that can generate SIGFPE signal on linux.
          tty->print_cr("unknown opcode 0x%X with SIGFPE.", op);
          fatal("please update this code.");
        }
#endif // AMD64
      } else if (sig == SIGSEGV &&
               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
          // Determination of interpreter/vtable stub/compiled code null exception
          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
      }
    } else if (thread->thread_state() == _thread_in_vm &&
               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
               thread->doing_unsafe_access()) {
        stub = StubRoutines::handler_for_unsafe_access();
    }

    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
    // and the heap gets shrunk before the field access.
    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
      address addr = JNI_FastGetField::find_slowcase_pc(pc);
      if (addr != (address)-1) {
        stub = addr;
      }
    }

    // Check to see if we caught the safepoint code in the
    // process of write protecting the memory serialization page.
    // It write enables the page immediately after protecting it
    // so we can just return to retry the write.
    if ((sig == SIGSEGV) &&
        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
      // Block current thread until the memory serialize page permission restored.
      os::block_on_serialize_page_trap();
      return true;
    }
  }

#ifndef AMD64
  // Execution protection violation
  //
  // This should be kept as the last step in the triage.  We don't
  // have a dedicated trap number for a no-execute fault, so be
  // conservative and allow other handlers the first shot.
  //
  // Note: We don't test that info->si_code == SEGV_ACCERR here.
  // this si_code is so generic that it is almost meaningless; and
  // the si_code for this condition may change in the future.
  // Furthermore, a false-positive should be harmless.
  if (UnguardOnExecutionViolation > 0 &&
      (sig == SIGSEGV || sig == SIGBUS) &&
      uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) {
    int page_size = os::vm_page_size();
    address addr = (address) info->si_addr;
    address pc = os::Linux::ucontext_get_pc(uc);
    // Make sure the pc and the faulting address are sane.
    //
    // If an instruction spans a page boundary, and the page containing
    // the beginning of the instruction is executable but the following
    // page is not, the pc and the faulting address might be slightly
    // different - we still want to unguard the 2nd page in this case.
    //
    // 15 bytes seems to be a (very) safe value for max instruction size.
    bool pc_is_near_addr =
      (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
    bool instr_spans_page_boundary =
      (align_size_down((intptr_t) pc ^ (intptr_t) addr,
                       (intptr_t) page_size) > 0);

    if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
      static volatile address last_addr =
        (address) os::non_memory_address_word();

      // In conservative mode, don't unguard unless the address is in the VM
      if (addr != last_addr &&
          (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {

        // Set memory to RWX and retry
        address page_start =
          (address) align_size_down((intptr_t) addr, (intptr_t) page_size);
        bool res = os::protect_memory((char*) page_start, page_size,
                                      os::MEM_PROT_RWX);

        if (PrintMiscellaneous && Verbose) {
          char buf[256];
          jio_snprintf(buf, sizeof(buf), "Execution protection violation "
                       "at " INTPTR_FORMAT
                       ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr,
                       page_start, (res ? "success" : "failed"), errno);
          tty->print_raw_cr(buf);
        }
        stub = pc;

        // Set last_addr so if we fault again at the same address, we don't end
        // up in an endless loop.
        //
        // There are two potential complications here.  Two threads trapping at
        // the same address at the same time could cause one of the threads to
        // think it already unguarded, and abort the VM.  Likely very rare.
        //
        // The other race involves two threads alternately trapping at
        // different addresses and failing to unguard the page, resulting in
        // an endless loop.  This condition is probably even more unlikely than
        // the first.
        //
        // Although both cases could be avoided by using locks or thread local
        // last_addr, these solutions are unnecessary complication: this
        // handler is a best-effort safety net, not a complete solution.  It is
        // disabled by default and should only be used as a workaround in case
        // we missed any no-execute-unsafe VM code.

        last_addr = addr;
      }
    }
  }
#endif // !AMD64

  if (stub != NULL) {
    // save all thread context in case we need to restore it
    if (thread != NULL) thread->set_saved_exception_pc(pc);

    uc->uc_mcontext.gregs[REG_PC] = (greg_t)stub;
    return true;
  }

  // signal-chaining
  if (os::Linux::chained_handler(sig, info, ucVoid)) {
     return true;
  }

  if (!abort_if_unrecognized) {
    // caller wants another chance, so give it to him
    return false;
  }

  if (pc == NULL && uc != NULL) {
    pc = os::Linux::ucontext_get_pc(uc);
  }

  // unmask current signal
  sigset_t newset;
  sigemptyset(&newset);
  sigaddset(&newset, sig);
  sigprocmask(SIG_UNBLOCK, &newset, NULL);

  VMError err(t, sig, pc, info, ucVoid);
  err.report_and_die();

  ShouldNotReachHere();
  return true; // Mute compiler
}

void os::Linux::init_thread_fpu_state(void) {
#ifndef AMD64
  // set fpu to 53 bit precision
  set_fpu_control_word(0x27f);
#endif // !AMD64
}

int os::Linux::get_fpu_control_word(void) {
#ifdef AMD64
  return 0;
#else
  int fpu_control;
  _FPU_GETCW(fpu_control);
  return fpu_control & 0xffff;
#endif // AMD64
}

void os::Linux::set_fpu_control_word(int fpu_control) {
#ifndef AMD64
  _FPU_SETCW(fpu_control);
#endif // !AMD64
}

// Check that the linux kernel version is 2.4 or higher since earlier
// versions do not support SSE without patches.
bool os::supports_sse() {
#ifdef AMD64
  return true;
#else
  struct utsname uts;
  if( uname(&uts) != 0 ) return false; // uname fails?
  char *minor_string;
  int major = strtol(uts.release,&minor_string,10);
  int minor = strtol(minor_string+1,NULL,10);
  bool result = (major > 2 || (major==2 && minor >= 4));
#ifndef PRODUCT
  if (PrintMiscellaneous && Verbose) {
    tty->print("OS version is %d.%d, which %s support SSE/SSE2\n",
               major,minor, result ? "DOES" : "does NOT");
  }
#endif
  return result;
#endif // AMD64
}

bool os::is_allocatable(size_t bytes) {
#ifdef AMD64
  // unused on amd64?
  return true;
#else

  if (bytes < 2 * G) {
    return true;
  }

  char* addr = reserve_memory(bytes, NULL);

  if (addr != NULL) {
    release_memory(addr, bytes);
  }

  return addr != NULL;
#endif // AMD64
}


// thread stack

#ifdef AMD64
size_t os::Linux::min_stack_allowed  = 64 * K;

// amd64: pthread on amd64 is always in floating stack mode
bool os::Linux::supports_variable_stack_size() {  return true; }
#else
size_t os::Linux::min_stack_allowed  =  (48 DEBUG_ONLY(+4))*K;

#ifdef __GNUC__
#define GET_GS() ({int gs; __asm__ volatile("movw %%gs, %w0":"=q"(gs)); gs&0xffff;})
#endif

// Test if pthread library can support variable thread stack size. LinuxThreads
// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
// in floating stack mode and NPTL support variable stack size.
bool os::Linux::supports_variable_stack_size() {
  if (os::Linux::is_NPTL()) {
     // NPTL, yes
     return true;

  } else {
    // Note: We can't control default stack size when creating a thread.
    // If we use non-default stack size (pthread_attr_setstacksize), both
    // floating stack and non-floating stack LinuxThreads will return the
    // same value. This makes it impossible to implement this function by
    // detecting thread stack size directly.
    //
    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
    // %gs (either as LDT selector or GDT selector, depending on kernel)
    // to access thread specific data.
    //
    // Note that %gs is a reserved glibc register since early 2001, so
    // applications are not allowed to change its value (Ulrich Drepper from
    // Redhat confirmed that all known offenders have been modified to use
    // either %fs or TSD). In the worst case scenario, when VM is embedded in
    // a native application that plays with %gs, we might see non-zero %gs
    // even LinuxThreads is running in fixed stack mode. As the result, we'll
    // return true and skip _thread_safety_check(), so we may not be able to
    // detect stack-heap collisions. But otherwise it's harmless.
    //
#ifdef __GNUC__
    return (GET_GS() != 0);
#else
    return false;
#endif
  }
}
#endif // AMD64

// return default stack size for thr_type
size_t os::Linux::default_stack_size(os::ThreadType thr_type) {
  // default stack size (compiler thread needs larger stack)
#ifdef AMD64
  size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
#else
  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
#endif // AMD64
  return s;
}

size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
  // Creating guard page is very expensive. Java thread has HotSpot
  // guard page, only enable glibc guard page for non-Java threads.
  return (thr_type == java_thread ? 0 : page_size());
}

// Java thread:
//
//   Low memory addresses
//    +------------------------+
//    |                        |\  JavaThread created by VM does not have glibc
//    |    glibc guard page    | - guard, attached Java thread usually has
//    |                        |/  1 page glibc guard.
// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
//    |                        |\
//    |  HotSpot Guard Pages   | - red and yellow pages
//    |                        |/
//    +------------------------+ JavaThread::stack_yellow_zone_base()
//    |                        |\
//    |      Normal Stack      | -
//    |                        |/
// P2 +------------------------+ Thread::stack_base()
//
// Non-Java thread:
//
//   Low memory addresses
//    +------------------------+
//    |                        |\
//    |  glibc guard page      | - usually 1 page
//    |                        |/
// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
//    |                        |\
//    |      Normal Stack      | -
//    |                        |/
// P2 +------------------------+ Thread::stack_base()
//
// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from
//    pthread_attr_getstack()

static void current_stack_region(address * bottom, size_t * size) {
  if (os::is_primordial_thread()) {
     // primordial thread needs special handling because pthread_getattr_np()
     // may return bogus value.
     *bottom = os::Linux::initial_thread_stack_bottom();
     *size   = os::Linux::initial_thread_stack_size();
  } else {
     pthread_attr_t attr;

     int rslt = pthread_getattr_np(pthread_self(), &attr);

     // JVM needs to know exact stack location, abort if it fails
     if (rslt != 0) {
       if (rslt == ENOMEM) {
         vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
       } else {
         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
       }
     }

     if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
         fatal("Can not locate current stack attributes!");
     }

     pthread_attr_destroy(&attr);

  }
  assert(os::current_stack_pointer() >= *bottom &&
         os::current_stack_pointer() < *bottom + *size, "just checking");
}

address os::current_stack_base() {
  address bottom;
  size_t size;
  current_stack_region(&bottom, &size);
  return (bottom + size);
}

size_t os::current_stack_size() {
  // stack size includes normal stack and HotSpot guard pages
  address bottom;
  size_t size;
  current_stack_region(&bottom, &size);
  return size;
}

/
// helper functions for fatal error handler

void os::print_context(outputStream *st, void *context) {
  if (context == NULL) return;

  ucontext_t *uc = (ucontext_t*)context;
  st->print_cr("Registers:");
#ifdef AMD64
  st->print(  "RAX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RAX]);
  st->print(", RBX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RBX]);
  st->print(", RCX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RCX]);
  st->print(", RDX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RDX]);
  st->cr();
  st->print(  "RSP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RSP]);
  st->print(", RBP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RBP]);
  st->print(", RSI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RSI]);
  st->print(", RDI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RDI]);
  st->cr();
  st->print(  "R8 =" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R8]);
  st->print(", R9 =" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R9]);
  st->print(", R10=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R10]);
  st->print(", R11=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R11]);
  st->cr();
  st->print(  "R12=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R12]);
  st->print(", R13=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R13]);
  st->print(", R14=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R14]);
  st->print(", R15=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_R15]);
  st->cr();
  st->print(  "RIP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_RIP]);
  st->print(", EFLAGS=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EFL]);
  st->print(", CSGSFS=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_CSGSFS]);
  st->print(", ERR=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_ERR]);
  st->cr();
  st->print("  TRAPNO=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_TRAPNO]);
#else
  st->print(  "EAX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EAX]);
  st->print(", EBX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EBX]);
  st->print(", ECX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_ECX]);
  st->print(", EDX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EDX]);
  st->cr();
  st->print(  "ESP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_UESP]);
  st->print(", EBP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EBP]);
  st->print(", ESI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_ESI]);
  st->print(", EDI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EDI]);
  st->cr();
  st->print(  "EIP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EIP]);
  st->print(", EFLAGS=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EFL]);
  st->print(", CR2=" INTPTR_FORMAT, uc->uc_mcontext.cr2);
#endif // AMD64
  st->cr();
  st->cr();

  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", sp);
  print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t));
  st->cr();

  // Note: it may be unsafe to inspect memory near pc. For example, pc may
  // point to garbage if entry point in an nmethod is corrupted. Leave
  // this at the end, and hope for the best.
  address pc = os::Linux::ucontext_get_pc(uc);
  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", pc);
  print_hex_dump(st, pc - 32, pc + 32, sizeof(char));
}

void os::print_register_info(outputStream *st, void *context) {
  if (context == NULL) return;

  ucontext_t *uc = (ucontext_t*)context;

  st->print_cr("Register to memory mapping:");
  st->cr();

  // this is horrendously verbose but the layout of the registers in the
  // context does not match how we defined our abstract Register set, so
  // we can't just iterate through the gregs area

  // this is only for the "general purpose" registers

#ifdef AMD64
  st->print("RAX="); print_location(st, uc->uc_mcontext.gregs[REG_RAX]);
  st->print("RBX="); print_location(st, uc->uc_mcontext.gregs[REG_RBX]);
  st->print("RCX="); print_location(st, uc->uc_mcontext.gregs[REG_RCX]);
  st->print("RDX="); print_location(st, uc->uc_mcontext.gregs[REG_RDX]);
  st->print("RSP="); print_location(st, uc->uc_mcontext.gregs[REG_RSP]);
  st->print("RBP="); print_location(st, uc->uc_mcontext.gregs[REG_RBP]);
  st->print("RSI="); print_location(st, uc->uc_mcontext.gregs[REG_RSI]);
  st->print("RDI="); print_location(st, uc->uc_mcontext.gregs[REG_RDI]);
  st->print("R8 ="); print_location(st, uc->uc_mcontext.gregs[REG_R8]);
  st->print("R9 ="); print_location(st, uc->uc_mcontext.gregs[REG_R9]);
  st->print("R10="); print_location(st, uc->uc_mcontext.gregs[REG_R10]);
  st->print("R11="); print_location(st, uc->uc_mcontext.gregs[REG_R11]);
  st->print("R12="); print_location(st, uc->uc_mcontext.gregs[REG_R12]);
  st->print("R13="); print_location(st, uc->uc_mcontext.gregs[REG_R13]);
  st->print("R14="); print_location(st, uc->uc_mcontext.gregs[REG_R14]);
  st->print("R15="); print_location(st, uc->uc_mcontext.gregs[REG_R15]);
#else
  st->print("EAX="); print_location(st, uc->uc_mcontext.gregs[REG_EAX]);
  st->print("EBX="); print_location(st, uc->uc_mcontext.gregs[REG_EBX]);
  st->print("ECX="); print_location(st, uc->uc_mcontext.gregs[REG_ECX]);
  st->print("EDX="); print_location(st, uc->uc_mcontext.gregs[REG_EDX]);
  st->print("ESP="); print_location(st, uc->uc_mcontext.gregs[REG_ESP]);
  st->print("EBP="); print_location(st, uc->uc_mcontext.gregs[REG_EBP]);
  st->print("ESI="); print_location(st, uc->uc_mcontext.gregs[REG_ESI]);
  st->print("EDI="); print_location(st, uc->uc_mcontext.gregs[REG_EDI]);
#endif // AMD64

  st->cr();
}

void os::setup_fpu() {
#ifndef AMD64
  address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std();
  __asm__ volatile (  "fldcw (%0)" :
                      : "r" (fpu_cntrl) : "memory");
#endif // !AMD64
}

#ifndef PRODUCT
void os::verify_stack_alignment() {
#ifdef AMD64
  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
#endif
}
#endif


/*
 * IA32 only: execute code at a high address in case buggy NX emulation is present. I.e. avoid CS limit
 * updates (JDK-8023956).
 */
void os::workaround_expand_exec_shield_cs_limit() {
#if defined(IA32)
  size_t page_size = os::vm_page_size();

  /*
   * JDK-8197429
   *
   * Expand the stack mapping to the end of the initial stack before
   * attempting to install the codebuf.  This is needed because newer
   * Linux kernels impose a distance of a megabyte between stack
   * memory and other memory regions.  If we try to install the
   * codebuf before expanding the stack the installation will appear
   * to succeed but we'll get a segfault later if we expand the stack
   * in Java code.
   *
   */
  if (os::is_primordial_thread()) {
    address limit = Linux::initial_thread_stack_bottom();
    if (! DisablePrimordialThreadGuardPages) {
      limit += (StackYellowPages + StackRedPages) * page_size;
    }
    os::Linux::expand_stack_to(limit);
  }

  /*
   * Take the highest VA the OS will give us and exec
   *
   * Although using -(pagesz) as mmap hint works on newer kernel as you would
   * think, older variants affected by this work-around don't (search forward only).
   *
   * On the affected distributions, we understand the memory layout to be:
   *
   *   TASK_LIMIT= 3G, main stack base close to TASK_LIMT.
   *
   * A few pages south main stack will do it.
   *
   * If we are embedded in an app other than launcher (initial != main stack),
   * we don't have much control or understanding of the address space, just let it slide.
   */
  char* hint = (char*) (Linux::initial_thread_stack_bottom() -
                        ((StackYellowPages + StackRedPages + 1) * page_size));
  char* codebuf = os::attempt_reserve_memory_at(page_size, hint);

  if (codebuf == NULL) {
    // JDK-8197429: There may be a stack gap of one megabyte between
    // the limit of the stack and the nearest memory region: this is a
    // Linux kernel workaround for CVE-2017-1000364.  If we failed to
    // map our codebuf, try again at an address one megabyte lower.
    hint -= 1 * M;
    codebuf = os::attempt_reserve_memory_at(page_size, hint);
  }

  if ( (codebuf == NULL) || (!os::commit_memory(codebuf, page_size, true)) ) {
    return; // No matter, we tried, best effort.
  }
  if (PrintMiscellaneous && (Verbose || WizardMode)) {
     tty->print_cr("[CS limit NX emulation work-around, exec code at: %p]", codebuf);
  }

  // Some code to exec: the 'ret' instruction
  codebuf[0] = 0xC3;

  // Call the code in the codebuf
  __asm__ volatile("call *%0" : : "r"(codebuf));

  // keep the page mapped so CS limit isn't reduced.
#endif
}
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/os_linux_x86.hpp
/*
 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_OS_LINUX_X86_HPP
#define OS_CPU_LINUX_X86_VM_OS_LINUX_X86_HPP

  static void setup_fpu();
  static bool supports_sse();

  static jlong rdtsc();

  static bool is_allocatable(size_t bytes);

  // Used to register dynamic code cache area with the OS
  // Note: Currently only used in 64 bit Windows implementations
  static bool register_code_area(char *low, char *high) { return true; }

  /*
   * Work-around for broken NX emulation using CS limit, Red Hat patch "Exec-Shield"
   * (IA32 only).
   *
   * Map and execute at a high VA to prevent CS lazy updates race with SMP MM
   * invalidation.Further code generation by the JVM will no longer cause CS limit
   * updates.
   *
   * Affects IA32: RHEL 5 & 6, Ubuntu 10.04 (LTS), 10.10, 11.04, 11.10, 12.04.
   * @see JDK-8023956
   */
  static void workaround_expand_exec_shield_cs_limit();

#endif // OS_CPU_LINUX_X86_VM_OS_LINUX_X86_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/os_linux_x86.inline.hpp
/*
 * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
#define OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP

#include "runtime/os.hpp"

// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
inline jlong os::rdtsc() {
#ifndef AMD64
  // 64 bit result in edx:eax
  uint64_t res;
  __asm__ __volatile__ ("rdtsc" : "=A" (res));
  return (jlong)res;
#else
  uint64_t res;
  uint32_t ts1, ts2;
  __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
  res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
  return (jlong)res;
#endif // AMD64
}

#endif // OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/prefetch_linux_x86.inline.hpp
/*
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_PREFETCH_LINUX_X86_INLINE_HPP
#define OS_CPU_LINUX_X86_VM_PREFETCH_LINUX_X86_INLINE_HPP

#include "runtime/prefetch.hpp"


inline void Prefetch::read (void *loc, intx interval) {
#ifdef AMD64
  __asm__ ("prefetcht0 (%0,%1,1)" : : "r" (loc), "r" (interval));
#endif // AMD64
}

inline void Prefetch::write(void *loc, intx interval) {
#ifdef AMD64

  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
  //  __asm__ ("prefetchw (%0,%1,1)" : : "r" (loc), "r" (interval));
  __asm__ ("prefetcht0 (%0,%1,1)" : : "r" (loc), "r" (interval));

#endif // AMD64
}

#endif // OS_CPU_LINUX_X86_VM_PREFETCH_LINUX_X86_INLINE_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/threadLS_linux_x86.cpp
/*
 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "runtime/thread.inline.hpp"
#include "runtime/threadLocalStorage.hpp"

// Map stack pointer (%esp) to thread pointer for faster TLS access
//
// Here we use a flat table for better performance. Getting current thread
// is down to one memory access (read _sp_map[%esp>>12]) in generated code
// and two in runtime code (-fPIC code needs an extra load for _sp_map).
//
// This code assumes stack page is not shared by different threads. It works
// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters).
//
// Notice that _sp_map is allocated in the bss segment, which is ZFOD
// (zero-fill-on-demand). While it reserves 4M address space upfront,
// actual memory pages are committed on demand.
//
// If an application creates and destroys a lot of threads, usually the
// stack space freed by a thread will soon get reused by new thread
// (this is especially true in NPTL or LinuxThreads in fixed-stack mode).
// No memory page in _sp_map is wasted.
//
// However, it's still possible that we might end up populating &
// committing a large fraction of the 4M table over time, but the actual
// amount of live data in the table could be quite small. The max wastage
// is less than 4M bytes. If it becomes an issue, we could use madvise()
// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map.
// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the
// physical memory page (i.e. similar to MADV_FREE on Solaris).

#if !defined(AMD64) && !defined(MINIMIZE_RAM_USAGE)
Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];

void ThreadLocalStorage::generate_code_for_get_thread() {
    // nothing we can do here for user-level thread
}

void ThreadLocalStorage::pd_init() {
  assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(),
         "page size must be multiple of PAGE_SIZE");
}

void ThreadLocalStorage::pd_set_thread(Thread* thread) {
  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
  address stack_top = os::current_stack_base();
  size_t stack_size = os::current_stack_size();

  for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) {
    // pd_set_thread() is called with non-NULL value when a new thread is
    // created/attached, or with NULL value when a thread is about to exit.
    // If both "thread" and the corresponding _sp_map[] entry are non-NULL,
    // they should have the same value. Otherwise it might indicate that the
    // stack page is shared by multiple threads. However, a more likely cause
    // for this assertion to fail is that an attached thread exited without
    // detaching itself from VM, which is a program error and could cause VM
    // to crash.
    assert(thread == NULL || _sp_map[(uintptr_t)p >> PAGE_SHIFT] == NULL ||
           thread == _sp_map[(uintptr_t)p >> PAGE_SHIFT],
           "thread exited without detaching from VM??");
    _sp_map[(uintptr_t)p >> PAGE_SHIFT] = thread;
  }
}
#else

void ThreadLocalStorage::generate_code_for_get_thread() {
    // nothing we can do here for user-level thread
}

void ThreadLocalStorage::pd_init() {
}

void ThreadLocalStorage::pd_set_thread(Thread* thread) {
  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
}
#endif // !AMD64 && !MINIMIZE_RAM_USAGE
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/threadLS_linux_x86.hpp
/*
 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_THREADLS_LINUX_X86_HPP
#define OS_CPU_LINUX_X86_VM_THREADLS_LINUX_X86_HPP

  // Processor dependent parts of ThreadLocalStorage

#if !defined(AMD64) && !defined(MINIMIZE_RAM_USAGE)

  // map stack pointer to thread pointer - see notes in threadLS_linux_x86.cpp
  #define SP_BITLENGTH  32
  #define PAGE_SHIFT    12
  #define PAGE_SIZE     (1UL << PAGE_SHIFT)
  static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];

public:

  static Thread** sp_map_addr() { return _sp_map; }

  static Thread* thread() {
    uintptr_t sp;
    __asm__ volatile ("movl %%esp, %0" : "=r" (sp));
    return _sp_map[sp >> PAGE_SHIFT];
  }

#else

public:

   static Thread* thread() {
     return (Thread*) os::thread_local_storage_at(thread_index());
   }

#endif // AMD64 || MINIMIZE_RAM_USAGE

#endif // OS_CPU_LINUX_X86_VM_THREADLS_LINUX_X86_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/thread_linux_x86.cpp
/*
 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/thread.inline.hpp"

// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
// currently interrupted by SIGPROF
bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
  void* ucontext, bool isInJava) {

  assert(Thread::current() == this, "caller must be current thread");
  return pd_get_top_frame(fr_addr, ucontext, isInJava);
}

bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
  return pd_get_top_frame(fr_addr, ucontext, isInJava);
}

bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
  assert(this->is_Java_thread(), "must be JavaThread");
  JavaThread* jt = (JavaThread *)this;

  // If we have a last_Java_frame, then we should use it even if
  // isInJava == true.  It should be more reliable than ucontext info.
  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
    *fr_addr = jt->pd_last_frame();
    return true;
  }

  // At this point, we don't have a last_Java_frame, so
  // we try to glean some information out of the ucontext
  // if we were running Java code when SIGPROF came in.
  if (isInJava) {
    ucontext_t* uc = (ucontext_t*) ucontext;

    intptr_t* ret_fp;
    intptr_t* ret_sp;
    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
      &ret_sp, &ret_fp);
    if (addr.pc() == NULL || ret_sp == NULL ) {
      // ucontext wasn't useful
      return false;
    }

    frame ret_frame(ret_sp, ret_fp, addr.pc());
    if (!ret_frame.safe_for_sender(jt)) {
#ifdef COMPILER2
      // C2 uses ebp as a general register see if NULL fp helps
      frame ret_frame2(ret_sp, NULL, addr.pc());
      if (!ret_frame2.safe_for_sender(jt)) {
        // nothing else to try if the frame isn't good
        return false;
      }
      ret_frame = ret_frame2;
#else
      // nothing else to try if the frame isn't good
      return false;
#endif /* COMPILER2 */
    }
    *fr_addr = ret_frame;
    return true;
  }

  // nothing else to try
  return false;
}

void JavaThread::cache_global_variables() { }
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/thread_linux_x86.hpp
/*
 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_THREAD_LINUX_X86_HPP
#define OS_CPU_LINUX_X86_VM_THREAD_LINUX_X86_HPP

 private:
  void pd_initialize() {
    _anchor.clear();
  }

  frame pd_last_frame() {
    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
    assert(_anchor.last_Java_pc() != NULL, "not walkable");
    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
  }

 public:
  // Mutators are highly dangerous....
  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }

  void set_base_of_stack_pointer(intptr_t* base_sp) {
  }

  static ByteSize last_Java_fp_offset()          {
    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
  }

  intptr_t* base_of_stack_pointer() {
    return NULL;
  }
  void record_base_of_stack_pointer() {
  }

  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
    bool isInJava);

  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
private:
  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
public:

  // These routines are only used on cpu architectures that
  // have separate register stacks (Itanium).
  static bool register_stack_overflow() { return false; }
  static void enable_register_stack_guard() {}
  static void disable_register_stack_guard() {}

#endif // OS_CPU_LINUX_X86_VM_THREAD_LINUX_X86_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/vmStructs_linux_x86.hpp
/*
 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#ifndef OS_CPU_LINUX_X86_VM_VMSTRUCTS_LINUX_X86_HPP
#define OS_CPU_LINUX_X86_VM_VMSTRUCTS_LINUX_X86_HPP

// These are the OS and CPU-specific fields, types and integer
// constants required by the Serviceability Agent. This file is
// referenced by vmStructs.cpp.

#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
                                                                                                                                     \
  /******************************/                                                                                                   \
  /* Threads (NOTE: incomplete) */                                                                                                   \
  /******************************/                                                                                                   \
  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)


#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
                                                                          \
  /**********************/                                                \
  /* Posix Thread IDs   */                                                \
  /**********************/                                                \
                                                                          \
  declare_integer_type(OSThread::thread_id_t)                             \
  declare_unsigned_integer_type(pthread_t)

#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)

#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)

#endif // OS_CPU_LINUX_X86_VM_VMSTRUCTS_LINUX_X86_HPP
C:\hotspot-69087d08d473\src\os_cpu\linux_x86\vm/vm_version_linux_x86.cpp
/*
 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "runtime/os.hpp"
#include "vm_version_x86.hpp"