This commit is contained in:
zhangruian 2023-03-22 16:04:09 +08:00
parent e7c6453248
commit 33ba385bc6
1159 changed files with 51474 additions and 22078 deletions

70
.gitignore vendored Normal file
View File

@ -0,0 +1,70 @@
# Prerequisites
*.d
# Object files
*.o
*.ko
*.obj
*.elf
# Linker output
*.ilk
*.map
*.exp
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
*.idb
*.pdb
# Kernel Module Compile Results
*.mod*
*.cmd
.tmp_versions/
modules.order
Module.symvers
Mkfile.old
dkms.conf
# from box86
build/
build*/
.vscode
.cache
.gdb_history
src/git_head.h
backup/
# LLVMprivateGenerator
/LLVMprivateGenerator/*
!/LLVMprivateGenerator/Makefile
!/LLVMprivateGenerator/main.cpp
!/LLVMprivateGenerator/registered_structs.cpp
# macOS
.DS_Store

1048
CMakeLists.txt Normal file → Executable file

File diff suppressed because it is too large Load Diff

65
LICENSE
View File

@ -1,52 +1,21 @@
Mulan License
MIT License
Copyright (c) 2022 openkylin
Copyright (c) 2020 ptitSeb
Mulan Permissive Software LicenseVersion 1 (Mulan PSL v1)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
August 2019 http://license.coscl.org.cn/MulanPSL
Your reproduction, use, modification and distribution of the Software shall be subject to Mulan PSL v1 (this License) with following terms and conditions:
0. Definition
Software means the program and related documents which are comprised of those Contribution and licensed under this License.
Contributor means the Individual or Legal Entity who licenses its copyrightable work under this License.
Legal Entity means the entity making a Contribution and all its Affiliates.
Affiliates means entities that control, or are controlled by, or are under common control with a party to this License, control means direct or indirect ownership of at least fifty percent (50%) of the voting power, capital or other securities of controlled or commonly controlled entity.
Contribution means the copyrightable work licensed by a particular Contributor under this License.
1. Grant of Copyright License
Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable copyright license to reproduce, use, modify, or distribute its Contribution, with modification or not.
2. Grant of Patent License
Subject to the terms and conditions of this License, each Contributor hereby grants to you a perpetual, worldwide, royalty-free, non-exclusive, irrevocable (except for revocation under this Section) patent license to make, have made, use, offer for sale, sell, import or otherwise transfer its Contribution where such patent license is only limited to the patent claims owned or controlled by such Contributor now or in future which will be necessarily infringed by its Contribution alone, or by combination of the Contribution with the Software to which the Contribution was contributed, excluding of any patent claims solely be infringed by your or others modification or other combinations. If you or your Affiliates directly or indirectly (including through an agent, patent licensee or assignee, institute patent litigation (including a cross claim or counterclaim in a litigation) or other patent enforcement activities against any individual or entity by alleging that the Software or any Contribution in it infringes patents, then any patent license granted to you under this License for the Software shall terminate as of the date such litigation or activity is filed or taken.
3. No Trademark License
No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, except as required to fulfill notice requirements in section 4.
4. Distribution Restriction
You may distribute the Software in any medium with or without modification, whether in source or executable forms, provided that you provide recipients with a copy of this License and retain copyright, patent, trademark and disclaimer statements in the Software.
5. Disclaimer of Warranty and Limitation of Liability
The Software and Contribution in it are provided without warranties of any kind, either express or implied. In no event shall any Contributor or copyright holder be liable to you for any damages, including, but not limited to any direct, or indirect, special or consequential damages arising from your use or inability to use the Software or the Contribution in it, no matter how its caused or based on which legal theory, even if advised of the possibility of such damages.
End of the Terms and Conditions
How to apply the Mulan Permissive Software LicenseVersion 1 (Mulan PSL v1) to your software
To apply the Mulan PSL v1 to your work, for easy identification by recipients, you are suggested to complete following three steps:
Fill in the blanks in following statement, including insert your software name, the year of the first publication of your software, and your name identified as the copyright owner;
Create a file named “LICENSE” which contains the whole context of this License in the first directory of your software package;
Attach the statement to the appropriate annotated syntax at the beginning of each source file.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,18 +0,0 @@
#### RVTrans
Rvtrans is a dynamic binary translation tool for x86 to riscv.
#### 简介
RVTrans 是一个动态二进制翻译项目能够实现从X86指令到RISC-V指令的翻译。能够支持在RISC-V架构的linux系统平台上运行X86应用。
#### 编译和安装
git clone https://gitee.com/openkylin/rvtrans;
cd box64;
mkdir build; cd build; cmake ..;
make -j`nproc`;
sudo make install;
#### 参与贡献

View File

@ -46,12 +46,119 @@ except ImportError:
print("It seems your Python version is quite broken...")
assert(False)
"""
Generates all files in src/wrapped/generated
===
TL;DR: Automagically creates type definitions (/.F.+/ functions/typedefs...).
All '//%' in the headers are used by the script.
Reads each lines of each "_private.h" headers.
For each of them:
- If if starts with a #ifdef, #else, #ifndef, #endif, it memorizes which definition is required
- If it starts with a "GO", it will do multiple things:
- It memorizes the type used by the function (second macro argument)
- It memorizes the type it is mapped to, if needed (eg, iFEvp is mapped to iFEp: the first "real" argument is dropped)
- It checks if the type given (both original and mapped to) are valid
- If the signature contains a 'E' but it is not a "GOM" command, it will throw an error
- If the line starts with a '//%S', it will memorize a structure declaration.
The structure of it is: "//%S <letter> <structure name> <signature equivalent>"
NOTE: Those structure letters are "fake types" that are accepted in the macros.
After sorting the data, it generates:
wrapper.c
---------
(Private) type definitions (/.F.+_t/)
Function definitions (/.F.+/ functions, that actually execute the function given as argument)
isSimpleWrapper definition
wrapper.h
---------
Generic "wrapper_t" type definition
Function declarations (/.F.+/ functions)
*types.h
--------
Local types definition, for the original signatures
The SUPER() macro definition, used to generate and initialize the `*_my_t` library structure
(TODO: also automate this declaration/definition? It would require more metadata,
and may break sometime in the future due to the system changing...)
*defs.h
-------
Local `#define`s, for signature mapping
*undefs.h
---------
Local `#undefine`s, for signature mapping
Example:
========
In wrappedtest_private.h:
----------------------
//%S X TestLibStructure ppu
GO(superfunction, pFX)
GOM(superFunction2, pFpX)
Generated files:
wrapper.c: [snippet]
----------
typedef void *(*pFppu_t)(void*, void*, uint32_t);
typedef void *(*pFpppu_t)(void*, void*, void*, uint32_t);
void pFppu(x64emu_t *emu, uintptr_t fcn) { pFppu_t *fn = (pFppu_t)fn; R_RAX=...; }
void pFpppu(x64emu_t *emu, uintptr_t fcn) { pFpppu_t *fn = (pFpppu_t)fn; R_RAX=...; }
int isSimpleWrapper(wrapper_t fun) {
if (fcn == pFppu) return 1;
if (fcn == pFpppu) return 1;
return 0;
}
wrapper.h: [snippet]
----------
void pFppu(x64emu_t *emu, uintptr_t fcn);
void pFpppu(x64emu_t *emu, uintptr_t fcn);
int isSimpleWrapper(wrapper_t fun);
wrappedtesttypes.h:
-------------------
typedef void *(*pFpX_t)(void*, TestLibStructure);
#define SUPER() \\
GO(superFunction2, pFpX)
wrappedtestdefs.h:
------------------
#define pFX pFppu
#define pFpX pFpppu
wrappedtestundefs.h:
--------------------
#undef pFX
#undef pFpX
"""
# TODO: Add /.F.*A/ automatic generation (and suppression)
class FunctionConvention(object):
def __init__(self, ident: str, convname: str, valid_chars: List[str]) -> None:
self.ident = ident
self.name = convname
self.values = valid_chars
# Free letters: B FG J QR T XYZab e gh jk mno qrst xyz
class FunctionType(str):
values: List[str] = ['E', 'v', 'c', 'w', 'i', 'I', 'C', 'W', 'u', 'U', 'f', 'd', 'D', 'K', 'l', 'L', 'p', 'V', 'O', 'S', 'N', 'M', 'H', 'P', 'A']
conventions = {
'F': FunctionConvention('F', "System V", ['E', 'v', 'c', 'w', 'i', 'I', 'C', 'W', 'u', 'U', 'f', 'd', 'D', 'K', 'l', 'L', 'p', 'V', 'O', 'S', 'N', 'M', 'H', 'P', 'A']),
'W': FunctionConvention('W', "Windows", ['E', 'v', 'c', 'w', 'i', 'I', 'C', 'W', 'u', 'U', 'f', 'd', 'K', 'l', 'L', 'p', 'V', 'O', 'S', 'N', 'M', 'P', 'A'])
}
sortedvalues = ['E', 'v', 'c', 'w', 'i', 'I', 'C', 'W', 'u', 'U', 'f', 'd', 'D', 'K', 'l', 'L', 'p', 'V', 'O', 'S', 'N', 'M', 'H', 'P', 'A', '0', '1']
assert(all(all(c not in conv.values[:i] and c in sortedvalues for i, c in enumerate(conv.values)) for conv in conventions.values()))
class FunctionType(str):
@staticmethod
def validate(s: str, post: str) -> bool:
if len(s) < 3:
@ -66,10 +173,13 @@ class FunctionType(str):
# TODO: change *FEv into true functions (right now they are redirected to *FE)
#chk_type = s[0] + s[3:]
if s[1] not in ["F"]:
if s[1] not in conventions:
raise NotImplementedError("Bad middle letter {0}{1}".format(s[1], post))
return all(c in FunctionType.values for c in chk_type) and (('v' not in chk_type[1:]) or (len(chk_type) == 2))
return all(c in conventions[s[1]].values for c in chk_type) and (('v' not in chk_type[1:]) or (len(chk_type) == 2))
def get_convention(self) -> FunctionConvention:
return conventions[self[1]]
def splitchar(self) -> List[int]:
"""
@ -80,9 +190,9 @@ class FunctionType(str):
of `values.index`.
"""
try:
ret = [len(self), FunctionType.values.index(self[0])]
ret = [len(self), ord(self.get_convention().ident), self.get_convention().values.index(self[0])]
for c in self[2:]:
ret.append(FunctionType.values.index(c))
ret.append(self.get_convention().values.index(c))
return ret
except ValueError as e:
raise ValueError("Value is " + self) from e
@ -90,8 +200,6 @@ class FunctionType(str):
def __getitem__(self, i: Union[int, slice]) -> 'FunctionType':
return FunctionType(super().__getitem__(i))
assert(all(c not in FunctionType.values[:i] for i, c in enumerate(FunctionType.values)))
RedirectType = NewType('RedirectType', FunctionType)
DefineType = NewType('DefineType', str)
@ -292,7 +400,9 @@ def readFiles(files: Iterable[Filename]) -> Tuple[JumbledGlobals, JumbledRedirec
or match("sdl1net", "sdl2net") \
or match("sdl1ttf", "sdl2ttf") \
or match("libGL", "libEGL") \
or match("libc", "tcmallocminimal"):
or match("libc", "tcmallocminimal") \
or match("libc", "tbbmallocproxy") \
or match("tcmallocminimal", "tbbmallocproxy"):
continue
# Note: this test is very (too) simple. If it ever raises, comment
@ -363,7 +473,8 @@ def readFiles(files: Iterable[Filename]) -> Tuple[JumbledGlobals, JumbledRedirec
gotype = ln.split("(")[0].strip()
funname = ln.split(",")[0].split("(")[1].strip()
ln = ln.split(",")[1].split(")")[0].strip()
add_symbol_name(funname)
if not filename.endswith("_genvate.h"):
add_symbol_name(funname)
except IndexError:
raise NotImplementedError("Invalid GO command: {0}:{1}".format(
filename, line[:-1]
@ -372,26 +483,40 @@ def readFiles(files: Iterable[Filename]) -> Tuple[JumbledGlobals, JumbledRedirec
hasFlatStructure = False
origLine = ln
if not FunctionType.validate(ln, " ({0}:{1})".format(filename, line[:-1])):
if (ln[0] in FunctionType.values) \
# This needs more work
old = RedirectType(FunctionType(ln))
if (ln[0] in old.get_convention().values) \
and ('v' not in ln[2:]) \
and all((c in FunctionType.values) or (c in mystructs) for c in ln[2:]):
and all((c in old.get_convention().values) or (c in mystructs) for c in ln[2:]):
hasFlatStructure = True
for sn in mystructs:
ln = ln.replace(sn, mystructs[sn][1])
ln = ln[0] + 'F' + ln[2:] # In case a structure named 'F' is used
mystructuses[RedirectType(FunctionType(origLine))] = FunctionType(ln)
else:
# This needs more work
old = RedirectType(FunctionType(ln))
acceptables = ['0', '1'] + FunctionType.values
if any(c not in acceptables for c in ln[2:]):
raise NotImplementedError("{0} ({1}:{2})".format(ln[2:], filename, line[:-1]))
# Ok, this is acceptable: there is 0, 1 and/or void
ln = ln[:2] + (ln[2:]
.replace("v", "") # void -> nothing
.replace("0", "i") # 0 -> integer
.replace("1", "i")) # 1 -> integer
assert(len(ln) >= 3)
if old.get_convention().name == "System V":
acceptables = ['0', '1'] + old.get_convention().values
if any(c not in acceptables for c in ln[2:]):
raise NotImplementedError("{0} ({1}:{2})".format(ln[2:], filename, line[:-1]))
# Ok, this is acceptable: there is 0, 1 and/or void
ln = ln[:2] + (ln[2:]
.replace("v", "") # void -> nothing
.replace("0", "i") # 0 -> integer
.replace("1", "i")) # 1 -> integer
assert(len(ln) >= 3)
else:
acceptables = ['0', '1', 'D', 'H'] + old.get_convention().values
if any(c not in acceptables for c in ln[2:]):
raise NotImplementedError("{0} ({1}:{2})".format(ln[2:], filename, line[:-1]))
# Ok, this is acceptable: there is 0, 1 and/or void
ln = ln[:2] + (ln[2:]
.replace("v", "") # void -> nothing
.replace("D", "p") # long double -> pointer
.replace("H", "p") # unsigned __int128 -> pointer
.replace("0", "i") # 0 -> integer
.replace("1", "i")) # 1 -> integer
assert(len(ln) >= 3)
redirects.setdefault(str(dependants), {})
redirects[str(dependants)][old] = FunctionType(ln)
@ -415,6 +540,7 @@ def readFiles(files: Iterable[Filename]) -> Tuple[JumbledGlobals, JumbledRedirec
typedefs.setdefault(funtype, [])
typedefs[funtype].append(funname)
elif (gotype == "GOM") or (gotype == "GOWM"):
# OK on box64 for a GOM to not have emu...
funtype = RedirectType(FunctionType(origLine))
typedefs.setdefault(funtype, [])
typedefs[funtype].append(funname)
@ -425,6 +551,7 @@ def readFiles(files: Iterable[Filename]) -> Tuple[JumbledGlobals, JumbledRedirec
typedefs.setdefault(RedirectType(FunctionType(origLine)), [])
# If the line is a structure metadata information...
# FIXME: what happens with e.g. a Windows function?
elif ln.startswith("//%S"):
metadata = [e for e in ln.split() if e]
if len(metadata) != 4:
@ -438,12 +565,12 @@ def readFiles(files: Iterable[Filename]) -> Tuple[JumbledGlobals, JumbledRedirec
if metadata[3] == "":
# If you need this, please open an issue (this is never actually called, empty strings are removed)
raise NotImplementedError("Invalid structure metadata supply (empty replacement) ({0}:{1})".format(filename, line[:-1]))
if any(c not in FunctionType.values for c in metadata[3]):
if any(c not in conventions['F'].values for c in metadata[3]):
# Note that replacement cannot be another structure type
raise NotImplementedError("Invalid structure metadata supply (invalid replacement) ({0}:{1})".format(filename, line[:-1]))
if metadata[1] in mystructs:
raise NotImplementedError("Invalid structure nickname {0} (duplicate) ({1}/{2})".format(metadata[1], filename, line[:-1]))
if (metadata[1] in FunctionType.values) or (metadata[1] in ['0', '1']):
if (metadata[1] in conventions['F'].values) or (metadata[1] in ['0', '1']):
raise NotImplementedError("Invalid structure nickname {0} (reserved) ({1}/{2})".format(metadata[1], filename, line[:-1]))
# OK, add into the database
@ -578,17 +705,21 @@ def sortArrays(gbl_tmp : JumbledGlobals, red_tmp : JumbledRedirects, filespec: J
for k3 in gbl:
gbl[k3].sort(key=FunctionType.splitchar)
FunctionType.values = FunctionType.values + ['0', '1']
oldvals = { k: conventions[k].values for k in conventions }
for k in conventions:
conventions[k].values = sortedvalues
for k3 in redirects:
redirects[k3].sort(key=lambda v: v[0].splitchar() + v[1].splitchar())
FunctionType.values = FunctionType.values[:-2]
for k in conventions:
conventions[k].values = oldvals[k]
sortedfilespec: SortedFilesSpecific = {}
for fn in filespec:
# Maybe do better?
mystructs_vals: List[str] = sorted(filespec[fn][1].keys())
if mystructs_vals != []:
FunctionType.values = FunctionType.values + list(mystructs_vals)
for k in conventions:
conventions[k].values = conventions[k].values + list(mystructs_vals)
mytypedefs_vals: List[RedirectType] = sorted(filespec[fn][0].keys(), key=FunctionType.splitchar)
sortedfilespec[fn] = (
@ -597,7 +728,8 @@ def sortArrays(gbl_tmp : JumbledGlobals, red_tmp : JumbledRedirects, filespec: J
)
if mystructs_vals != []:
FunctionType.values = FunctionType.values[:-len(mystructs_vals)]
for k in conventions:
conventions[k].values = conventions[k].values[:-len(mystructs_vals)]
return CustOrderedDict(gbl, gbl_idxs), CustOrderedDict(redirects, redirects_idxs), sortedfilespec
@ -684,6 +816,8 @@ def main(root: str, files: Iterable[Filename], ver: str):
# Detect simple wrappings
simple_wraps: Dict[ClausesStr, List[Tuple[FunctionType, int]]] = {}
allowed_conv_ident = "F"
allowed_conv = conventions[allowed_conv_ident]
# H could be allowed maybe?
allowed_simply: str = "v"
@ -692,16 +826,18 @@ def main(root: str, files: Iterable[Filename], ver: str):
# Sanity checks
forbidden_simple: str = "EDKVOSNMHPA"
assert(len(allowed_simply) + len(allowed_regs) + len(allowed_fpr) + len(forbidden_simple) == len(FunctionType.values))
assert(len(allowed_simply) + len(allowed_regs) + len(allowed_fpr) + len(forbidden_simple) == len(allowed_conv.values))
assert(all(c not in allowed_regs for c in allowed_simply))
assert(all(c not in allowed_simply + allowed_regs for c in allowed_fpr))
assert(all(c not in allowed_simply + allowed_regs + allowed_fpr for c in forbidden_simple))
assert(all(c in allowed_simply + allowed_regs + allowed_fpr + forbidden_simple for c in FunctionType.values))
assert(all(c in allowed_simply + allowed_regs + allowed_fpr + forbidden_simple for c in allowed_conv.values))
def check_simple(v: FunctionType):
def check_simple(v: FunctionType) -> Optional[int]:
regs_count: int = 0
fpr_count : int = 0
if v.get_convention() is not allowed_conv:
return None
if v[0] in forbidden_simple:
return None
for c in v[2:]:
@ -857,15 +993,21 @@ def main(root: str, files: Iterable[Filename], ver: str):
# Rewrite the wrapper.c file:
# i and u should only be 32 bits
# E v c w i I C W u U f d D K l L p V O S N M H P A
td_types = ["x64emu_t*", "void", "int8_t", "int16_t", "int64_t", "int64_t", "uint8_t", "uint16_t", "uint64_t", "uint64_t", "float", "double", "long double", "double", "intptr_t", "uintptr_t", "void*", "void*", "int32_t", "void*", "...", "...", "unsigned __int128", "void*", "void*"]
if len(FunctionType.values) != len(td_types):
raise NotImplementedError("len(values) = {lenval} != len(td_types) = {lentypes}".format(lenval=len(FunctionType.values), lentypes=len(td_types)))
td_types = {
# E v c w i I C W u U f d D K l L p V O S N M H P A
'F': ["x64emu_t*", "void", "int8_t", "int16_t", "int32_t", "int64_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t", "float", "double", "long double", "double", "intptr_t", "uintptr_t", "void*", "void*", "int32_t", "void*", "...", "...", "unsigned __int128", "void*", "void*"],
# E v c w i I C W u U f d K l L p V O S N M P A
'W': ["x64emu_t*", "void", "int8_t", "int16_t", "int32_t", "int64_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t", "float", "double", "double", "intptr_t", "uintptr_t", "void*", "void*", "int32_t", "void*", "...", "...", "void*", "void*"]
}
assert(all(k in conventions for k in td_types))
for k in conventions:
if len(conventions[k].values) != len(td_types[k]):
raise NotImplementedError("len(values) = {lenval} != len(td_types) = {lentypes}".format(lenval=len(conventions[k].values), lentypes=len(td_types[k])))
def generate_typedefs(arr: Iterable[FunctionType], file) -> None:
for v in arr:
file.write("typedef " + td_types[FunctionType.values.index(v[0])] + " (*" + v + "_t)"
+ "(" + ', '.join(td_types[FunctionType.values.index(t)] for t in v[2:]) + ");\n")
file.write("typedef " + td_types[v.get_convention().ident][v.get_convention().values.index(v[0])] + " (*" + v + "_t)"
+ "(" + ', '.join(td_types[v.get_convention().ident][v.get_convention().values.index(t)] for t in v[2:]) + ");\n")
with open(os.path.join(root, "src", "wrapped", "generated", "wrapper.c"), 'w') as file:
file.write(files_header["wrapper.c"].format(lbr="{", rbr="}", version=ver))
@ -884,37 +1026,61 @@ def main(root: str, files: Iterable[Filename], ver: str):
# Helper variables
# Return type template
vals = [
"\n#error Invalid return type: emulator\n", # E
"fn({0});", # v
"R_RAX=fn({0});", # c
"R_RAX=fn({0});", # w
"R_RAX=(int64_t)fn({0});", # i should be int32_t
"R_RAX=(int64_t)fn({0});", # I
"R_RAX=(unsigned char)fn({0});", # C
"R_RAX=(unsigned short)fn({0});", # W
"R_RAX=(uint64_t)fn({0});", # u should be uint32_t
"R_RAX=fn({0});", # U
"emu->xmm[0].f[0]=fn({0});", # f
"emu->xmm[0].d[0]=fn({0});", # d
"long double ld=fn({0}); fpu_do_push(emu); ST0val = ld;", # D
"double db=fn({0}); fpu_do_push(emu); ST0val = db;", # K
"R_RAX=(intptr_t)fn({0});", # l
"R_RAX=(uintptr_t)fn({0});", # L
"R_RAX=(uintptr_t)fn({0});", # p
"\n#error Invalid return type: va_list\n", # V
"\n#error Invalid return type: at_flags\n", # O
"\n#error Invalid return type: _io_file*\n", # S
"\n#error Invalid return type: ... with 1 arg\n", # N
"\n#error Invalid return type: ... with 2 args\n", # M
"unsigned __int128 u128 = fn({0}); R_RAX=(u128&0xFFFFFFFFFFFFFFFFL); R_RDX=(u128>>64)&0xFFFFFFFFFFFFFFFFL;", # H
"\n#error Invalid return type: pointer in the stack\n", # P
"\n#error Invalid return type: va_list\n", # A
]
vals = {
conventions['F']: [
"\n#error Invalid return type: emulator\n", # E
"fn({0});", # v
"R_RAX=fn({0});", # c
"R_RAX=fn({0});", # w
"R_RAX=(int32_t)fn({0});", # i
"R_RAX=(int64_t)fn({0});", # I
"R_RAX=(unsigned char)fn({0});", # C
"R_RAX=(unsigned short)fn({0});", # W
"R_RAX=(uint32_t)fn({0});", # u
"R_RAX=fn({0});", # U
"emu->xmm[0].f[0]=fn({0});", # f
"emu->xmm[0].d[0]=fn({0});", # d
"long double ld=fn({0}); fpu_do_push(emu); ST0val = ld;", # D
"double db=fn({0}); fpu_do_push(emu); ST0val = db;", # K
"R_RAX=(intptr_t)fn({0});", # l
"R_RAX=(uintptr_t)fn({0});", # L
"R_RAX=(uintptr_t)fn({0});", # p
"\n#error Invalid return type: va_list\n", # V
"\n#error Invalid return type: at_flags\n", # O
"\n#error Invalid return type: _io_file*\n", # S
"\n#error Invalid return type: ... with 1 arg\n", # N
"\n#error Invalid return type: ... with 2 args\n", # M
"unsigned __int128 u128 = fn({0}); R_RAX=(u128&0xFFFFFFFFFFFFFFFFL); R_RDX=(u128>>64)&0xFFFFFFFFFFFFFFFFL;", # H
"\n#error Invalid return type: pointer in the stack\n", # P
"\n#error Invalid return type: va_list\n", # A
],
conventions['W']: [
"\n#error Invalid return type: emulator\n", # E
"fn({0});", # v
"R_RAX=fn({0});", # c
"R_RAX=fn({0});", # w
"R_RAX=(int32_t)fn({0});", # i
"R_RAX=(int64_t)fn({0});", # I
"R_RAX=(unsigned char)fn({0});", # C
"R_RAX=(unsigned short)fn({0});", # W
"R_RAX=(uint32_t)fn({0});", # u
"R_RAX=fn({0});", # U
"emu->xmm[0].f[0]=fn({0});", # f
"emu->xmm[0].d[0]=fn({0});", # d
"double db=fn({0}); fpu_do_push(emu); ST0val = db;", # K
"R_RAX=(intptr_t)fn({0});", # l
"R_RAX=(uintptr_t)fn({0});", # L
"R_RAX=(uintptr_t)fn({0});", # p
"\n#error Invalid return type: va_list\n", # V
"\n#error Invalid return type: at_flags\n", # O
"\n#error Invalid return type: _io_file*\n", # S
"\n#error Invalid return type: ... with 1 arg\n", # N
"\n#error Invalid return type: ... with 2 args\n", # M
"\n#error Invalid return type: pointer in the stack\n", # P
"\n#error Invalid return type: va_list\n", # A
]
}
# Name of the registers
reg_arg = ["R_RDI", "R_RSI", "R_RDX", "R_RCX", "R_R8", "R_R9"]
assert(len(reg_arg) == 6)
# vreg: value is in a general register
# E v c w i I C W u U f d D K l L p V O S N M H P A
vreg = [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 2, 2, 0, 1]
@ -932,11 +1098,11 @@ def main(root: str, files: Iterable[Filename], ver: str):
"", # v
"(int8_t){p}, ", # c
"(int16_t){p}, ", # w
"(int64_t){p}, ", # i should int32_t
"(int32_t){p}, ", # i
"(int64_t){p}, ", # I
"(uint8_t){p}, ", # C
"(uint16_t){p}, ", # W
"(uint64_t){p}, ", # u should uint32_t
"(uint32_t){p}, ", # u
"(uint64_t){p}, ", # U
"", # f
"", # d
@ -1013,11 +1179,11 @@ def main(root: str, files: Iterable[Filename], ver: str):
"", # v
"*(int8_t*)(R_RSP + {p}), ", # c
"*(int16_t*)(R_RSP + {p}), ", # w
"*(int64_t*)(R_RSP + {p}), ", # i should be int32_t
"*(int32_t*)(R_RSP + {p}), ", # i
"*(int64_t*)(R_RSP + {p}), ", # I
"*(uint8_t*)(R_RSP + {p}), ", # C
"*(uint16_t*)(R_RSP + {p}), ", # W
"*(uint64_t*)(R_RSP + {p}), ", # u should be uint32_t
"*(uint32_t*)(R_RSP + {p}), ", # u
"*(uint64_t*)(R_RSP + {p}), ", # U
"*(float*)(R_RSP + {p}), ", # f
"*(double*)(R_RSP + {p}), ", # d
@ -1037,24 +1203,33 @@ def main(root: str, files: Iterable[Filename], ver: str):
]
# Asserts
if len(FunctionType.values) != len(vstack):
raise NotImplementedError("len(values) = {lenval} != len(vstack) = {lenvstack}".format(lenval=len(FunctionType.values), lenvstack=len(vstack)))
if len(FunctionType.values) != len(vreg):
raise NotImplementedError("len(values) = {lenval} != len(vreg) = {lenvreg}".format(lenval=len(FunctionType.values), lenvreg=len(vreg)))
if len(FunctionType.values) != len(vxmm):
raise NotImplementedError("len(values) = {lenval} != len(vxmm) = {lenvxmm}".format(lenval=len(FunctionType.values), lenvxmm=len(vxmm)))
if len(FunctionType.values) != len(vother):
raise NotImplementedError("len(values) = {lenval} != len(vother) = {lenvother}".format(lenval=len(FunctionType.values), lenvother=len(vother)))
if len(FunctionType.values) != len(arg_s):
raise NotImplementedError("len(values) = {lenval} != len(arg_s) = {lenargs}".format(lenval=len(FunctionType.values), lenargs=len(arg_s)))
if len(FunctionType.values) != len(arg_r):
raise NotImplementedError("len(values) = {lenval} != len(arg_r) = {lenargr}".format(lenval=len(FunctionType.values), lenargr=len(arg_r)))
if len(FunctionType.values) != len(arg_x):
raise NotImplementedError("len(values) = {lenval} != len(arg_x) = {lenargx}".format(lenval=len(FunctionType.values), lenargx=len(arg_x)))
if len(FunctionType.values) != len(arg_o):
raise NotImplementedError("len(values) = {lenval} != len(arg_o) = {lenargo}".format(lenval=len(FunctionType.values), lenargo=len(arg_o)))
if len(FunctionType.values) != len(vals):
raise NotImplementedError("len(values) = {lenval} != len(vals) = {lenvals}".format(lenval=len(FunctionType.values), lenvals=len(vals)))
for k in conventions:
assert all(v in conventions['F'].values for v in conventions[k].values), "a convention is not a subset of System V"
assert all(vr == vs for (vr, vs) in zip(vreg, vstack) if vr != 0), "vreg and vstack are inconsistent"
assert all(vx == vs for (vx, vs) in zip(vxmm, vstack) if vx != 0), "vxmm and vstack are inconsistent"
assert all((vo == 0) == (vs != 0) for (vo, vs) in zip(vother, vstack)), "vother and vstack are inconsistent"
if len(conventions['F'].values) != len(vstack):
raise NotImplementedError("len(values) = {lenval} != len(vstack) = {lenvstack}".format(lenval=len(conventions['F'].values), lenvstack=len(vstack)))
if len(conventions['F'].values) != len(vreg):
raise NotImplementedError("len(values) = {lenval} != len(vreg) = {lenvreg}".format(lenval=len(conventions['F'].values), lenvreg=len(vreg)))
if len(conventions['F'].values) != len(vxmm):
raise NotImplementedError("len(values) = {lenval} != len(vxmm) = {lenvxmm}".format(lenval=len(conventions['F'].values), lenvxmm=len(vxmm)))
if len(conventions['F'].values) != len(vother):
raise NotImplementedError("len(values) = {lenval} != len(vother) = {lenvother}".format(lenval=len(conventions['F'].values), lenvother=len(vother)))
if len(conventions['F'].values) != len(arg_s):
raise NotImplementedError("len(values) = {lenval} != len(arg_s) = {lenargs}".format(lenval=len(conventions['F'].values), lenargs=len(arg_s)))
if len(conventions['F'].values) != len(arg_r):
raise NotImplementedError("len(values) = {lenval} != len(arg_r) = {lenargr}".format(lenval=len(conventions['F'].values), lenargr=len(arg_r)))
if len(conventions['F'].values) != len(arg_x):
raise NotImplementedError("len(values) = {lenval} != len(arg_x) = {lenargx}".format(lenval=len(conventions['F'].values), lenargx=len(arg_x)))
if len(conventions['F'].values) != len(arg_o):
raise NotImplementedError("len(values) = {lenval} != len(arg_o) = {lenargo}".format(lenval=len(conventions['F'].values), lenargo=len(arg_o)))
for k in conventions:
c = conventions[k]
if c not in vals:
raise NotImplementedError("[{k}]values not in vals".format(k=k, lenval=len(c.values), lenvals=len(vals[c])))
if len(c.values) != len(vals[c]):
raise NotImplementedError("len([{k}]values) = {lenval} != len(vals[...]) = {lenvals}".format(k=k, lenval=len(c.values), lenvals=len(vals[c])))
# When arg_* is not empty, v* should not be 0
if any(map(lambda v, a: (a != "") and (v == 0), vstack, arg_s)):
raise NotImplementedError("Something in the stack has a null offset and a non-empty arg string")
@ -1075,7 +1250,8 @@ def main(root: str, files: Iterable[Filename], ver: str):
raise NotImplementedError("Something can be in an XMM register but not in the stack")
# Helper functions to write the function definitions
def function_args(args: FunctionType, d: int = 8, r: int = 0, x: int = 0) -> str:
systemVconv = conventions['F']
def function_args_systemV(args: FunctionType, d: int = 8, r: int = 0, x: int = 0) -> str:
# args: string of argument types
# d: delta (in the stack)
# r: general register no
@ -1085,16 +1261,18 @@ def main(root: str, files: Iterable[Filename], ver: str):
# Redirections
if args[0] == "0":
return "0, " + function_args(args[1:], d, r, x)
return "0, " + function_args_systemV(args[1:], d, r, x)
elif args[0] == "1":
return "1, " + function_args(args[1:], d, r, x)
return "1, " + function_args_systemV(args[1:], d, r, x)
idx = FunctionType.values.index(args[0])
if (r < 6) and (vreg[idx] > 0):
idx = systemVconv.values.index(args[0])
# Name of the registers
reg_arg = ["R_RDI", "R_RSI", "R_RDX", "R_RCX", "R_R8", "R_R9"]
if (r < len(reg_arg)) and (vreg[idx] > 0):
ret = ""
for _ in range(vreg[idx]):
# There may be values in multiple registers
if r < 6:
if r < len(reg_arg):
# Value is in a general register
ret = ret + arg_r[idx].format(p=reg_arg[r])
r = r + 1
@ -1102,23 +1280,59 @@ def main(root: str, files: Iterable[Filename], ver: str):
# Remaining is in the stack
ret = ret + arg_s[idx].format(p=d)
d = d + 8
return ret + function_args(args[1:], d, r, x)
return ret + function_args_systemV(args[1:], d, r, x)
elif (x < 8) and (vxmm[idx] > 0):
# Value is in an XMM register
return arg_x[idx].format(p=x) + function_args(args[1:], d, r, x+1)
return arg_x[idx].format(p=x) + function_args_systemV(args[1:], d, r, x+1)
elif vstack[idx] > 0:
# Value is in the stack
return arg_s[idx].format(p=d) + function_args(args[1:], d+8*vstack[idx], r, x)
return arg_s[idx].format(p=d) + function_args_systemV(args[1:], d+8*vstack[idx], r, x)
else:
# Value is somewhere else
return arg_o[idx].format(p=d) + function_args(args[1:], d, r, x)
return arg_o[idx].format(p=d) + function_args_systemV(args[1:], d, r, x)
# windowsconv = conventions['W']
def function_args_windows(args: FunctionType, d: int = 40, r: int = 0) -> str:
# args: string of argument types
# d: delta (in the stack)
# r: general register no
# We can re-use vstack to know if we need to put a pointer or the value
if len(args) == 0:
return ""
# Redirections
if args[0] == "0":
return "0, " + function_args_windows(args[1:], d, r)
elif args[0] == "1":
return "1, " + function_args_windows(args[1:], d, r)
idx = systemVconv.values.index(args[0]) # Little hack to be able to re-use
# Name of the registers
reg_arg = ["R_RCX", "R_RDX", "R_R8", "R_R9"]
if (r < len(reg_arg)) and (vstack[idx] == 1):
# We use a register
if vreg[idx] == 1:
# Value is in a general register
return arg_r[idx].format(p=reg_arg[r]) + function_args_windows(args[1:], d, r+1)
else:
# Remaining is in an XMM register
return arg_x[idx].format(p=r) + function_args_windows(args[1:], d, r+1)
elif vstack[idx] > 0:
# Value is in the stack
return arg_s[idx].format(p=d) + function_args_windows(args[1:], d+8*vstack[idx], r)
else:
# Value is somewhere else
return arg_o[idx].format(p=d) + function_args_windows(args[1:], d, r)
def function_writer(f, N: FunctionType, W: str) -> None:
# Write to f the function type N (real type W)
f.write("void {0}(x64emu_t *emu, uintptr_t fcn) {2} {1} fn = ({1})fcn; ".format(N, W, "{"))
# Generic function
f.write(vals[FunctionType.values.index(N[0])].format(function_args(N[2:])[:-2]) + " }\n")
conv = N.get_convention()
if conv is systemVconv:
f.write(vals[conv][conv.values.index(N[0])].format(function_args_systemV(N[2:])[:-2]) + " }\n")
else:
f.write(vals[conv][conv.values.index(N[0])].format(function_args_windows(N[2:])[:-2]) + " }\n")
for k in gbls:
if k != str(Clauses()):
@ -1176,13 +1390,15 @@ def main(root: str, files: Iterable[Filename], ver: str):
file.write(files_guard["wrapper.h"].format(lbr="{", rbr="}", version=ver))
# Rewrite the *types.h files:
td_types[FunctionType.values.index('A')] = "va_list"
td_types[FunctionType.values.index('V')] = "..."
orig_val_len = len(FunctionType.values)
for k in conventions:
td_types[k][conventions[k].values.index('A')] = "va_list"
td_types[k][conventions[k].values.index('V')] = "..."
orig_val_len = {k: len(conventions[k].values) for k in conventions}
for fn in filesspec:
for strc in fsp_tmp[fn][1]:
FunctionType.values.append(strc)
td_types.append(fsp_tmp[fn][1][strc][0])
for k in conventions:
conventions[k].values.append(strc)
td_types[k].append(fsp_tmp[fn][1][strc][0])
with open(os.path.join(root, "src", "wrapped", "generated", fn + "types.h"), 'w') as file:
file.write(files_header["fntypes.h"].format(lbr="{", rbr="}", version=ver, filename=fn))
@ -1206,8 +1422,9 @@ def main(root: str, files: Iterable[Filename], ver: str):
file.write("#undef {defined}\n".format(defined=defined))
file.write(files_guard["fnundefs.h"].format(lbr="{", rbr="}", version=ver, filename=fn))
FunctionType.values = FunctionType.values[:orig_val_len]
td_types = td_types[:orig_val_len]
for k in conventions:
conventions[k].values = conventions[k].values[:orig_val_len[k]]
td_types[k] = td_types[k][:orig_val_len[k]]
# Save the string for the next iteration, writing was successful
with open(os.path.join(root, "src", "wrapped", "generated", "functions_list.txt"), 'w') as file:
@ -1221,6 +1438,6 @@ if __name__ == '__main__':
if v == "--":
limit.append(i)
Define.defines = list(map(DefineType, sys.argv[2:limit[0]]))
if main(sys.argv[1], sys.argv[limit[0]+1:], "2.1.0.16") != 0:
if main(sys.argv[1], sys.argv[limit[0]+1:], "2.2.0.16") != 0:
exit(2)
exit(0)

61
runTest.cmake Executable file
View File

@ -0,0 +1,61 @@
# arguments checking
if( NOT TEST_PROGRAM )
message( FATAL_ERROR "Require TEST_PROGRAM to be defined" )
endif( NOT TEST_PROGRAM )
if( NOT TEST_ARGS )
message( FATAL_ERROR "Require TEST_ARGS to be defined" )
endif( NOT TEST_ARGS )
if( NOT TEST_OUTPUT )
message( FATAL_ERROR "Require TEST_OUTPUT to be defined" )
endif( NOT TEST_OUTPUT )
if( NOT TEST_REFERENCE )
message( FATAL_ERROR "Require TEST_REFERENCE to be defined" )
endif( NOT TEST_REFERENCE )
set(ENV{BOX64_LOG} 0)
set(ENV{BOX64_NOBANNER} 1)
if( EXISTS ${CMAKE_SOURCE_DIR}/x64lib )
# we are inside box64 folder
set(ENV{LD_LIBRARY_PATH} ${CMAKE_SOURCE_DIR}/x64lib)
else()
# we are inside build folder
set(ENV{LD_LIBRARY_PATH} ${CMAKE_SOURCE_DIR}/../x64lib)
endif( EXISTS ${CMAKE_SOURCE_DIR}/x64lib )
# run the test program, capture the stdout/stderr and the result var
execute_process(
COMMAND ${TEST_PROGRAM} ${TEST_ARGS} ${TEST_ARGS2}
OUTPUT_FILE ${TEST_OUTPUT}
ERROR_VARIABLE TEST_ERROR
RESULT_VARIABLE TEST_RESULT
)
# if the return value is !=0 bail out
if( TEST_RESULT )
get_filename_component(TESTNAME "${TEST_ARGS}" NAME)
file(RENAME "${TEST_OUTPUT}" "${CMAKE_BINARY_DIR}/${TESTNAME}.out")
file(WRITE "${CMAKE_BINARY_DIR}/${TESTNAME}.err" ${TEST_ERROR})
message( FATAL_ERROR "Failed: Test program ${TEST_PROGRAM} exited != 0.\n${TEST_ERROR}" )
endif( TEST_RESULT )
# now compare the output with the reference
execute_process(
COMMAND ${CMAKE_COMMAND} -E compare_files ${TEST_OUTPUT} ${TEST_REFERENCE}
RESULT_VARIABLE TEST_RESULT
)
# again, if return value is !=0 scream and shout
if( TEST_RESULT )
get_filename_component(TESTNAME "${TEST_ARGS}" NAME)
file(RENAME "${TEST_OUTPUT}" "${CMAKE_BINARY_DIR}/${TESTNAME}.out")
file(WRITE "${CMAKE_BINARY_DIR}/${TESTNAME}.err" ${TEST_ERROR})
message( FATAL_ERROR "Failed: The output of ${TEST_PROGRAM} did not match ${TEST_REFERENCE}")
endif( TEST_RESULT )
# remove the temporary files if they exist
if( EXISTS ${TEST_OUTPUT} )
file(REMOVE "${TEST_OUTPUT}")
endif()
# everything went fine...
message( "Passed: The output of ${TEST_PROGRAM} matches ${TEST_REFERENCE}" )

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <stdio.h>
#include <stdlib.h>
@ -9,7 +6,8 @@
#include <signal.h>
#include <sys/mman.h>
#include <pthread.h>
#include "rvtranscontext.h"
#include "box64context.h"
#include "debug.h"
#include "elfloader.h"
#include "custommem.h"
@ -21,9 +19,11 @@
#include "wrapper.h"
#include "x64emu.h"
#include "signals.h"
#include "rcfile.h"
#include "gltools.h"
EXPORTDYN
void initAllHelpers(rvtranscontext_t* context)
void initAllHelpers(box64context_t* context)
{
static int inited = 0;
if(inited)
@ -36,11 +36,12 @@ void initAllHelpers(rvtranscontext_t* context)
}
EXPORTDYN
void finiAllHelpers(rvtranscontext_t* context)
void finiAllHelpers(box64context_t* context)
{
static int finied = 0;
if(finied)
return;
DeleteParams();
fini_pthread_helper(context);
fini_signal_helper();
fini_bridge_helper();
@ -75,16 +76,27 @@ int unlockMutex()
{
int ret = unlockCustommemMutex();
int i;
#ifdef DYNAREC
uint32_t tid = (uint32_t)GetTID();
#define GO(A, B) \
i = (native_lock_storeifref2_d(&A, 0, tid)==tid); \
if(i) { \
ret|=(1<<B); \
}
#else
#define GO(A, B) \
i = checkUnlockMutex(&A); \
if(i) { \
ret|=(1<<B); \
}
#endif
GO(my_context->mutex_once, 5)
GO(my_context->mutex_once2, 6)
GO(my_context->mutex_trace, 7)
#ifdef DYNAREC
GO(my_context->mutex_dyndump, 8)
#else
GO(my_context->mutex_lock, 8)
#endif
GO(my_context->mutex_tls, 9)
GO(my_context->mutex_thread, 10)
GO(my_context->mutex_bridge, 11)
@ -98,51 +110,86 @@ void relockMutex(int locks)
relockCustommemMutex(locks);
#define GO(A, B) \
if(locks&(1<<B)) \
pthread_mutex_lock(&A); \
mutex_trylock(&A); \
GO(my_context->mutex_once, 5)
GO(my_context->mutex_once2, 6)
GO(my_context->mutex_trace, 7)
#ifdef DYNAREC
GO(my_context->mutex_dyndump, 8)
#else
GO(my_context->mutex_lock, 8)
#endif
GO(my_context->mutex_tls, 9)
GO(my_context->mutex_thread, 10)
GO(my_context->mutex_bridge, 11)
#undef GO
}
static void init_mutexes(rvtranscontext_t* context)
static void init_mutexes(box64context_t* context)
{
#ifndef DYNAREC
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
pthread_mutex_init(&context->mutex_once, &attr);
pthread_mutex_init(&context->mutex_once2, &attr);
pthread_mutex_init(&context->mutex_trace, &attr);
pthread_mutex_init(&context->mutex_lock, &attr);
pthread_mutex_init(&context->mutex_trace, &attr);
pthread_mutex_init(&context->mutex_tls, &attr);
pthread_mutex_init(&context->mutex_thread, &attr);
pthread_mutex_init(&context->mutex_bridge, &attr);
pthread_mutexattr_destroy(&attr);
#else
native_lock_store(&context->mutex_trace, 0);
native_lock_store(&context->mutex_tls, 0);
native_lock_store(&context->mutex_thread, 0);
native_lock_store(&context->mutex_bridge, 0);
native_lock_store(&context->mutex_dyndump, 0);
#endif
}
static void atfork_child_rvtranscontext(void)
static void atfork_child_box64context(void)
{
// (re)init mutex if it was lock before the fork
init_mutexes(my_context);
}
EXPORTDYN
rvtranscontext_t *NewRVTransContext(int argc)
void freeCycleLog(box64context_t* ctx)
{
// init and put default values
rvtranscontext_t *context = my_context = (rvtranscontext_t*)box_calloc(1, sizeof(rvtranscontext_t));
if(cycle_log)
for(int i=0; i<CYCLE_LOG; ++i) {
if(cycle_log) {
for(int i=0; i<cycle_log; ++i) {
box_free(ctx->log_call[i]);
box_free(ctx->log_ret[i]);
}
box_free(ctx->log_call);
box_free(ctx->log_ret);
ctx->log_call = NULL;
ctx->log_ret = NULL;
}
}
void initCycleLog(box64context_t* context)
{
if(cycle_log) {
context->log_call = (char**)box_calloc(cycle_log, sizeof(char*));
context->log_ret = (char**)box_calloc(cycle_log, sizeof(char*));
for(int i=0; i<cycle_log; ++i) {
context->log_call[i] = (char*)box_calloc(256, 1);
context->log_ret[i] = (char*)box_calloc(128, 1);
}
}
}
EXPORTDYN
box64context_t *NewBox64Context(int argc)
{
#ifdef BUILD_DYNAMIC
if(my_context) {
++my_context->count;
return my_context;
}
#endif
// init and put default values
box64context_t *context = my_context = (box64context_t*)box_calloc(1, sizeof(box64context_t));
initCycleLog(context);
context->deferedInit = 1;
context->sel_serial = 1;
@ -153,6 +200,8 @@ rvtranscontext_t *NewRVTransContext(int argc)
context->local_maplib = NewLibrarian(context, 1);
context->versym = NewDictionnary();
context->system = NewBridge();
context->globaldefver = NewDefaultVersion();
context->weakdefver = NewDefaultVersion();
// create vsyscall
context->vsyscall = AddBridge(context->system, vFEv, x64Syscall, 0, NULL);
// create the vsyscalls
@ -163,15 +212,15 @@ rvtranscontext_t *NewRVTransContext(int argc)
addAlternate((void*)0xffffffffff600000, (void*)context->vsyscalls[0]);
addAlternate((void*)0xffffffffff600400, (void*)context->vsyscalls[1]);
addAlternate((void*)0xffffffffff600800, (void*)context->vsyscalls[2]);
// get handle to rvtrans itself
context->rvtranslib = dlopen(NULL, RTLD_NOW|RTLD_GLOBAL);
// get handle to box64 itself
context->box64lib = dlopen(NULL, RTLD_NOW|RTLD_GLOBAL);
context->dlprivate = NewDLPrivate();
context->argc = argc;
context->argv = (char**)box_calloc(context->argc+1, sizeof(char*));
init_mutexes(context);
pthread_atfork(NULL, NULL, atfork_child_rvtranscontext);
pthread_atfork(NULL, NULL, atfork_child_box64context);
pthread_key_create(&context->tlskey, free_tlsdatasize);
@ -185,8 +234,9 @@ rvtranscontext_t *NewRVTransContext(int argc)
return context;
}
void freeALProcWrapper(box64context_t* context);
EXPORTDYN
void FreeRVTransContext(rvtranscontext_t** context)
void FreeBox64Context(box64context_t** context)
{
if(!context)
return;
@ -194,22 +244,24 @@ void FreeRVTransContext(rvtranscontext_t** context)
if(--(*context)->forked >= 0)
return;
rvtranscontext_t* ctx = *context; // local copy to do the cleanning
box64context_t* ctx = *context; // local copy to do the cleanning
if(ctx->local_maplib)
FreeLibrarian(&ctx->local_maplib, NULL);
if(ctx->maplib)
FreeLibrarian(&ctx->maplib, NULL);
FreeDictionnary(&ctx->versym);
FreeDefaultVersion(&ctx->globaldefver);
FreeDefaultVersion(&ctx->weakdefver);
for(int i=0; i<ctx->elfsize; ++i) {
FreeElfHeader(&ctx->elfs[i]);
}
box_free(ctx->elfs);
FreeCollection(&ctx->rvtrans_path);
FreeCollection(&ctx->rvtrans_ld_lib);
FreeCollection(&ctx->rvtrans_emulated_libs);
FreeCollection(&ctx->box64_path);
FreeCollection(&ctx->box64_ld_lib);
FreeCollection(&ctx->box64_emulated_libs);
// stop trace now
if(ctx->dec)
DeleteX64TraceDecoder(&ctx->dec);
@ -243,7 +295,8 @@ void FreeRVTransContext(rvtranscontext_t** context)
FreeDLPrivate(&ctx->dlprivate);
box_free(ctx->fullpath);
box_free(ctx->rvtranspath);
box_free(ctx->box64path);
box_free(ctx->bashpath);
FreeBridge(&ctx->system);
@ -264,31 +317,28 @@ void FreeRVTransContext(rvtranscontext_t** context)
if(ctx->tlsdata)
box_free(ctx->tlsdata);
free_neededlib(&ctx->neededlibs);
free_neededlib(ctx->neededlibs);
ctx->neededlibs = NULL;
if(ctx->emu_sig)
FreeX64Emu(&ctx->emu_sig);
finiAllHelpers(ctx);
pthread_mutex_destroy(&ctx->mutex_once);
pthread_mutex_destroy(&ctx->mutex_once2);
#ifndef DYNAREC
pthread_mutex_destroy(&ctx->mutex_trace);
pthread_mutex_destroy(&ctx->mutex_lock);
pthread_mutex_destroy(&ctx->mutex_tls);
pthread_mutex_destroy(&ctx->mutex_thread);
pthread_mutex_destroy(&ctx->mutex_bridge);
#endif
if(cycle_log)
for(int i=0; i<CYCLE_LOG; ++i) {
box_free(ctx->log_call[i]);
box_free(ctx->log_ret[i]);
}
freeCycleLog(ctx);
box_free(ctx);
}
int AddElfHeader(rvtranscontext_t* ctx, elfheader_t* head) {
int AddElfHeader(box64context_t* ctx, elfheader_t* head) {
int idx = ctx->elfsize;
if(idx==ctx->elfcap) {
// resize...
@ -301,7 +351,7 @@ int AddElfHeader(rvtranscontext_t* ctx, elfheader_t* head) {
return idx;
}
int AddTLSPartition(rvtranscontext_t* context, int tlssize) {
int AddTLSPartition(box64context_t* context, int tlssize) {
int oldsize = context->tlssize;
context->tlssize += tlssize;
context->tlsdata = box_realloc(context->tlsdata, context->tlssize);
@ -317,52 +367,3 @@ int AddTLSPartition(rvtranscontext_t* context, int tlssize) {
return -context->tlssize; // negative offset
}
void add_neededlib(needed_libs_t* needed, library_t* lib)
{
if(!needed)
return;
for(int i=0; i<needed->size; ++i)
if(needed->libs[i] == lib)
return;
if(needed->size == needed->cap) {
needed->cap += 8;
needed->libs = (library_t**)box_realloc(needed->libs, needed->cap*sizeof(library_t*));
}
needed->libs[needed->size++] = lib;
}
void free_neededlib(needed_libs_t* needed)
{
if(!needed)
return;
needed->cap = 0;
needed->size = 0;
if(needed->libs)
box_free(needed->libs);
needed->libs = NULL;
}
void add_dependedlib(needed_libs_t* depended, library_t* lib)
{
if(!depended)
return;
for(int i=0; i<depended->size; ++i)
if(depended->libs[i] == lib)
return;
if(depended->size == depended->cap) {
depended->cap += 8;
depended->libs = (library_t**)box_realloc(depended->libs, depended->cap*sizeof(library_t*));
}
depended->libs[depended->size++] = lib;
}
void free_dependedlib(needed_libs_t* depended)
{
if(!depended)
return;
depended->cap = 0;
depended->size = 0;
if(depended->libs)
box_free(depended->libs);
depended->libs = NULL;
}

8
src/box64version.h Executable file
View File

@ -0,0 +1,8 @@
#ifndef __BOX64_VERSION_H_
#define __BOX64_VERSION_H_
#define BOX64_MAJOR 0
#define BOX64_MINOR 2
#define BOX64_REVISION 3
#endif //__BOX64_VERSION_H_

View File

@ -1,13 +1,27 @@
#include <stdio.h>
#include "debug.h"
#include "rvtransversion.h"
#include "box64version.h"
#ifdef NOGIT
#define GITREV "nogit"
#else
#include "git_head.h"
#endif
void PrintRVTransVersion()
void PrintBox64Version()
{
printf("RVTrans%s%s v%d.%d.%d built on %s %s\n",
printf("Box64%s%s v%d.%d.%d %s built on %s %s\n",
#ifdef HAVE_TRACE
" with trace",
#else
"",
#endif
#ifdef DYNAREC
" with Dynarec",
#else
"",
RVTRANS_MAJOR, RVTRANS_MINOR, RVTRANS_REVISION,
#endif
BOX64_MAJOR, BOX64_MINOR, BOX64_REVISION,
GITREV,
__DATE__, __TIME__);
}

View File

@ -1,6 +1,6 @@
#ifndef __BUILD_INFO_H__
#define __BUILD_INFO_H__
void PrintRVTransVersion();
void PrintBox64Version();
#endif //__BUILD_INFO_H__

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,18 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <errno.h>
#include <setjmp.h>
#include <sys/mman.h>
#include "debug.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "tools/bridge_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
@ -22,6 +20,7 @@
#include "dynablock_private.h"
#include "dynarec_private.h"
#include "elfloader.h"
#include "bridge.h"
#include "dynarec_native.h"
#include "native_lock.h"
@ -29,8 +28,6 @@
#include "custommem.h"
#include "khash.h"
KHASH_MAP_INIT_INT(dynablocks, dynablock_t*)
uint32_t X31_hash_code(void* addr, int len)
{
if(!len) return 0;
@ -40,261 +37,92 @@ uint32_t X31_hash_code(void* addr, int len)
return (uint32_t)h;
}
dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct)
{
if(!textsz) {
printf_log(LOG_NONE, "Error, creating a NULL sized Dynablock\n");
return NULL;
}
dynablocklist_t* ret = (dynablocklist_t*)box_calloc(1, sizeof(dynablocklist_t));
ret->text = text;
ret->textsz = textsz;
ret->minstart = text;
ret->maxend = text+textsz-1;
if(direct && textsz) {
ret->direct = (dynablock_t**)box_calloc(textsz, sizeof(dynablock_t*));
if(!ret->direct) {printf_log(LOG_NONE, "Warning, fail to create direct block for dynablock @%p\n", (void*)text);}
}
dynarec_log(LOG_DEBUG, "New Dynablocklist %p, from %p->%p\n", ret, (void*)text, (void*)(text+textsz));
return ret;
}
void FreeDynablock(dynablock_t* db, int need_lock)
{
if(db) {
if(db->gone)
return; // already in the process of deletion!
dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p parent=%p, father=%p, with %d son(s) already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size-1, db->parent, db->father, db->sons_size, db->gone);
dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size-1, db->gone);
if(need_lock)
pthread_mutex_lock(&my_context->mutex_dyndump);
db->done = 0;
db->gone = 1;
// remove from direct if there
uintptr_t startdb = db->parent->text;
uintptr_t enddb = db->parent->text + db->parent->textsz;
if(db->parent->direct) {
uintptr_t addr = (uintptr_t)db->x64_addr;
if(addr>=startdb && addr<enddb)
native_lock_xchg(&db->parent->direct[addr-startdb], 0); // secured write
}
mutex_lock(&my_context->mutex_dyndump);
// remove jumptable
setJumpTableDefault64(db->x64_addr);
// remove and free the sons
for (int i=0; i<db->sons_size; ++i) {
dynablock_t *son = (dynablock_t*)native_lock_xchg(&db->sons[i], 0);
FreeDynablock(son, 0);
}
// only the father free the DynarecMap
if(!db->father) {
dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->block, db->size);
FreeDynarecMap(db, (uintptr_t)db->block, db->size);
box_free(db->sons);
box_free(db->instsize);
}
box_free(db);
dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->actual_block, db->size);
db->done = 0;
db->gone = 1;
FreeDynarecMap((uintptr_t)db->actual_block);
customFree(db);
if(need_lock)
pthread_mutex_unlock(&my_context->mutex_dyndump);
mutex_unlock(&my_context->mutex_dyndump);
}
}
void FreeDynablockList(dynablocklist_t** dynablocks)
{
if(!dynablocks)
return;
if(!*dynablocks)
return;
dynarec_log(LOG_DEBUG, "Free Dynablocklist %p, with Direct Blocks %p\n", *dynablocks, (*dynablocks)->direct);
if((*dynablocks)->direct) {
for (int i=0; i<(*dynablocks)->textsz; ++i) {
if((*dynablocks)->direct[i] && !(*dynablocks)->direct[i]->father)
FreeDynablock((*dynablocks)->direct[i], 1);
}
box_free((*dynablocks)->direct);
}
(*dynablocks)->direct = NULL;
box_free(*dynablocks);
*dynablocks = NULL;
}
void MarkDynablock(dynablock_t* db)
{
if(db) {
if(db->father)
db = db->father; // mark only father
if(db->need_test)
return; // already done
dynarec_log(LOG_DEBUG, "MarkDynablock %p with %d son(s) %p-%p\n", db, db->sons_size, db->x64_addr, db->x64_addr+db->x64_size-1);
dynarec_log(LOG_DEBUG, "MarkDynablock %p %p-%p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1);
db->need_test = 1;
setJumpTableDefault64(db->x64_addr);
for(int i=0; i<db->sons_size; ++i)
setJumpTableDefault64(db->sons[i]->x64_addr);
setJumpTableIfRef64(db->x64_addr, db->jmpnext, db->block);
}
}
uintptr_t StartDynablockList(dynablocklist_t* db)
{
if(db)
return db->text;
return 0;
}
uintptr_t EndDynablockList(dynablocklist_t* db)
{
if(db)
return db->text+db->textsz-1;
return 0;
}
int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2)
static int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2)
{
if(start1 > end2 || start2 > end1)
return 0;
return 1;
}
void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
static int MarkedDynablock(dynablock_t* db)
{
// Mark will try to find *any* blocks that intersect the range to mark
if(!dynablocks)
return;
if(!dynablocks->direct)
return;
uintptr_t startdb = dynablocks->text;
uintptr_t sizedb = dynablocks->textsz;
dynablock_t *db;
dynarec_log(LOG_DEBUG, "MarkDirectDynablock %p-%p .. startdb=%p, sizedb=%p\n", (void*)addr, (void*)addr+size-1, (void*)startdb, (void*)sizedb);
for(uintptr_t i = 0; i<sizedb; ++i)
if((db=dynablocks->direct[i]))
if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1))
MarkDynablock(db);
if(db) {
if(db->need_test)
return 1; // already done
}
return 0;
}
int FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
void MarkRangeDynablock(dynablock_t* db, uintptr_t addr, uintptr_t size)
{
if(!dynablocks)
// Mark will try to find *any* blocks that intersect the range to mark
if(!db)
return;
dynarec_log(LOG_DEBUG, "MarkRangeDynablock %p-%p .. startdb=%p, sizedb=%p\n", (void*)addr, (void*)addr+size-1, (void*)db->x64_addr, (void*)db->x64_size);
if(!MarkedDynablock(db))
if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1))
MarkDynablock(db);
}
int FreeRangeDynablock(dynablock_t* db, uintptr_t addr, uintptr_t size)
{
if(!db)
return 1;
if(dynablocks->direct) {
dynablock_t* db;
int ret;
khint_t k;
kh_dynablocks_t *blocks = kh_init(dynablocks);
// copy in a temporary list
uintptr_t startdb = dynablocks->text;
uintptr_t enddb = startdb + dynablocks->textsz;
uintptr_t start = addr;
uintptr_t end = addr+size;
if(start<startdb)
start = startdb;
if(end>enddb)
end = enddb;
if(end>startdb && start<enddb)
for(uintptr_t i = start; i<end; ++i) {
db = (dynablock_t*)native_lock_xchg(&dynablocks->direct[i-startdb], 0);
if(db) {
if(db->father)
db = db->father;
if(db->parent==dynablocks) {
k = kh_put(dynablocks, blocks, (uintptr_t)db, &ret);
kh_value(blocks, k) = db;
}
}
}
// purge the list
kh_foreach_value(blocks, db,
FreeDynablock(db, 1);
);
kh_destroy(dynablocks, blocks);
// check emptyness
for(uintptr_t i=0; i<dynablocks->textsz; ++i)
if(dynablocks->direct[i])
return 0;
return 1;
int need_lock = my_context?1:0;
if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1)) {
FreeDynablock(db, need_lock);
return 0;
}
return 1;
}
void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
{
if(!dynablocks)
return;
dynarec_log(LOG_DEBUG, "MarkRangeDynablock %p-%p\n", (void*)addr, (void*)addr+size-1);
if(dynablocks->direct) {
uintptr_t new_addr = dynablocks->minstart;
uintptr_t new_size = dynablocks->maxend - new_addr + 1;
MarkDirectDynablock(dynablocks, addr, size);
// the blocks check before
for(unsigned idx=(new_addr)>>DYNAMAP_SHIFT; idx<(addr>>DYNAMAP_SHIFT); ++idx)
MarkDirectDynablock(getDB(idx), addr, size);
}
}
dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks)
dynablock_t *AddNewDynablock(uintptr_t addr)
{
if(!dynablocks)
return NULL;
dynablock_t* db;
kh_foreach_value(dynablocks, db,
const uintptr_t s = (uintptr_t)db->block;
const uintptr_t e = (uintptr_t)db->block+db->size;
if((uintptr_t)addr>=s && (uintptr_t)addr<e)
return db->father?db->father:db;
)
return NULL;
}
static dynablocklist_t* getDBFromAddress(uintptr_t addr)
{
const uintptr_t idx = (addr>>DYNAMAP_SHIFT);
return getDB(idx);
}
dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created)
{
if(!dynablocks) {
dynarec_log(LOG_INFO, "Warning: Ask to create a dynablock with a NULL dynablocklist (addr=%p)\n", (void*)addr);
*created = 0;
dynablock_t* block;
#if 0
// check if memory as the correct flags
int prot = getProtection(addr);
if(!(prot&(PROT_EXEC|PROT_DYNAREC|PROT_DYNAREC_R))) {
dynarec_log(LOG_VERBOSE, "Block asked on a memory with no execution flags 0x%02X\n", prot);
return NULL;
}
if((addr<dynablocks->text) || (addr>=(dynablocks->text+dynablocks->textsz))) {
return AddNewDynablock(getDBFromAddress(addr), addr, created);
}
dynablock_t* block = NULL;
// first, check if it exist in direct access mode
if(dynablocks->direct) {
block = dynablocks->direct[addr-dynablocks->text];
if(block) {
dynarec_log(LOG_VERBOSE, "Block already exist in Direct Map\n");
*created = 0;
return block;
}
}
if (!*created)
return block;
pthread_mutex_lock(&my_context->mutex_dyndump);
if(!dynablocks->direct) {
dynablock_t** p = (dynablock_t**)box_calloc(dynablocks->textsz, sizeof(dynablock_t*));
if(native_lock_storeifnull(&dynablocks->direct, p)!=p)
box_free(p); // someone already create the direct array, too late...
}
#endif
// create and add new block
dynarec_log(LOG_VERBOSE, "Ask for DynaRec Block creation @%p\n", (void*)addr);
block = (dynablock_t*)box_calloc(1, sizeof(dynablock_t));
block->parent = dynablocks;
dynablock_t* tmp = (dynablock_t*)native_lock_storeifnull(&dynablocks->direct[addr-dynablocks->text], block);
if(tmp != block) {
// a block appeard!
pthread_mutex_unlock(&my_context->mutex_dyndump);
box_free(block);
*created = 0;
return tmp;
}
*created = 1;
pthread_mutex_lock(&my_context->mutex_dyndump);
block = (dynablock_t*)customCalloc(1, sizeof(dynablock_t));
return block;
}
@ -310,251 +138,107 @@ void cancelFillBlock()
return NULL if block is not found / cannot be created.
Don't create if create==0
*/
static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, int create, dynablock_t* current, int need_lock)
static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, int create, int need_lock)
{
// try the quickest way first: get parent of current and check if ok!
dynablocklist_t *dynablocks = NULL;
dynablock_t* block = NULL;
if(current && current->done && !current->gone) {
dynablocks = current->parent;
if(dynablocks && !(addr>=dynablocks->text && addr<(dynablocks->text+dynablocks->textsz)))
dynablocks = NULL;
}
// nope, lets do the long way
if(!dynablocks) {
dynablocks = getDBFromAddress(addr);
if(!dynablocks) {
dynablocks = GetDynablocksFromAddress(emu->context, addr);
if(!dynablocks)
if(hasAlternate((void*)addr))
return NULL;
dynablock_t* block = getDB(addr);
if(block || !create)
return block;
if(need_lock) {
if(box64_dynarec_wait) {
mutex_lock(&my_context->mutex_dyndump);
} else {
if(mutex_trylock(&my_context->mutex_dyndump)) // FillBlock not available for now
return NULL;
}
}
// check direct first, without lock
if(dynablocks->direct/* && (addr>=dynablocks->text) && (addr<(dynablocks->text+dynablocks->textsz))*/)
if((block = dynablocks->direct[addr-dynablocks->text]))
return block;
int created = create;
block = AddNewDynablock(dynablocks, addr, &created);
if(!created)
return block; // existing block...
block = getDB(addr); // just in case
if(block) {
if(need_lock)
mutex_unlock(&my_context->mutex_dyndump);
return block;
}
block = AddNewDynablock(addr);
// fill the block
block->x64_addr = (void*)addr;
if(need_lock)
pthread_mutex_lock(&my_context->mutex_dyndump);
if(sigsetjmp(&dynarec_jmpbuf, 1)) {
printf_log(LOG_INFO, "FillBlock at %p triggered a segfault, cancelling\n", (void*)addr);
if(need_lock)
pthread_mutex_unlock(&my_context->mutex_dyndump);
mutex_unlock(&my_context->mutex_dyndump);
return NULL;
}
void* ret = FillBlock64(block, filladdr);
if(need_lock)
pthread_mutex_unlock(&my_context->mutex_dyndump);
if(!ret) {
dynarec_log(LOG_DEBUG, "Fillblock of block %p for %p returned an error\n", block, (void*)addr);
void* old = (void*)native_lock_storeifref(&dynablocks->direct[addr-dynablocks->text], 0, block);
if(old!=block && old) {// put it back in place, strange things are happening here!
dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old);
// doing nothing else, the block has not be writen
}
box_free(block);
customFree(block);
block = NULL;
}
// check size
if(block && block->x64_size) {
if(dynablocks->minstart>addr)
dynablocks->minstart = addr;
if(block && (block->x64_size || (!block->x64_size && !block->done))) {
int blocksz = block->x64_size;
if(dynablocks->maxend<addr+blocksz) {
dynablocks->maxend = addr+blocksz;
for(unsigned idx=(addr>>DYNAMAP_SHIFT)+1; idx<=((addr+blocksz-1)>>DYNAMAP_SHIFT); ++idx) {
dynablocklist_t* dblist;
if((dblist = getDB(idx)))
if(dblist->minstart>addr)
dblist->minstart = addr;
}
}
if(blocksz>my_context->max_db_size)
my_context->max_db_size = blocksz;
// fill-in jumptable
addJumpTableIfDefault64(block->x64_addr, block->block);
for(int i=0; i<block->sons_size; ++i) {
addJumpTableIfDefault64(block->sons[i]->x64_addr, block->sons[i]->block);
block->sons[i]->done = 1;
if(!addJumpTableIfDefault64(block->x64_addr, block->block)) {
FreeDynablock(block, 0);
block = getDB(addr);
} else {
if(block->x64_size)
block->done = 1; // don't validate the block if the size is null, but keep the block
}
block->done = 1;
}
if(need_lock)
mutex_unlock(&my_context->mutex_dyndump);
dynarec_log(LOG_DEBUG, "%04d| --- DynaRec Block %s @%p:%p (%p, 0x%x bytes, with %d son(s))\n", GetTID(), created?"created":"recycled", (void*)addr, (void*)(addr+((block)?block->x64_size:1)-1), (block)?block->block:0, (block)?block->size:0, (block)?block->sons_size:0);
dynarec_log(LOG_DEBUG, "%04d| --- DynaRec Block created @%p:%p (%p, 0x%x bytes)\n", GetTID(), (void*)addr, (void*)(addr+((block)?block->x64_size:1)-1), (block)?block->block:0, (block)?block->size:0);
return block;
}
#define MAX_HOTPAGE 64
#define HOTPAGE_STEP 64
static int volatile hotpage_count[MAX_HOTPAGE] = {0};
static uintptr_t volatile hotpage[MAX_HOTPAGE] = {0};
static uintptr_t volatile hotpage_size[MAX_HOTPAGE] = {0};
static volatile int hotpages = 0;
int IsInHotPage(uintptr_t addr) {
if(!hotpages)
return 0;
for(int i=0; i<MAX_HOTPAGE; ++i) {
if((hotpage_count[i]>0) && (addr>=hotpage[i]) && (addr<hotpage[i]+0x1000*(hotpage_size[i]+1))) {
--hotpage_count[i];
if(!hotpage_count[i]) {
--hotpages;
hotpage_size[i] = 0;
dynarec_log(LOG_DEBUG, "End of Hotpage %p\n", (void*)hotpage[i]);
}
__sync_synchronize();
return 1;
}
}
return 0;
}
int AreaInHotPage(uintptr_t start, uintptr_t end) {
if(!hotpages)
return 0;
for(int i=0; i<MAX_HOTPAGE; ++i) {
if(hotpage_count[i]>0)
if(IntervalIntersects(start, end, hotpage[i], hotpage[i]+0x1000*(hotpage_size[i]+1)-1)) {
--hotpage_count[i];
if(!hotpage_count[i]) {
--hotpages;
hotpage_size[i] = 0;
dynarec_log(LOG_DEBUG, "End of Hotpage %p\n", (void*)hotpage[i]);
}
return 1;
}
}
return 0;
}
void FuseHotPage(int idx) {
uintptr_t start = hotpage[idx];
uintptr_t end = start+0x1000*(hotpage_size[idx]+1);
for(int i=0; i<MAX_HOTPAGE; ++i)
if(i!=idx && hotpage_count[i]>0) {
if(IntervalIntersects(start, end, hotpage[i], hotpage[i]+0x1000*(hotpage_size[i]+1)-1)) {
if(hotpage_count[i]>hotpage_count[idx])
hotpage_count[idx] = hotpage_count[i];
if(hotpage[i]>hotpage[idx])
hotpage[idx]=hotpage[i];
if(hotpage[i]+0x1000*(hotpage_size[i]+1)>end)
hotpage_size[idx] = ((hotpage[i]+0x1000*(hotpage_size[i]+1))-hotpage[idx])/0x1000 - 1;
hotpage_count[i] = 0;
return;
}
}
}
void AddHotPage(uintptr_t addr) {
addr&=~0xfff;
// look for same address
for(int i=0; i<MAX_HOTPAGE; ++i) {
if(addr>=hotpage[i] && addr<hotpage[i]+0x1000*(hotpage_size[i]+1)) {
if(!hotpage_count[i])
++hotpages;
hotpage_count[i] = HOTPAGE_STEP;
__sync_synchronize();
return;
}
if(addr==hotpage[i]+0x1000*(hotpage_size[i]+1)) {
++hotpage_size[i];
hotpage_count[i] = HOTPAGE_STEP;
FuseHotPage(i);
__sync_synchronize();
return;
}
if(addr+0x1000==hotpage[i]) {
++hotpage_size[i];
hotpage[i] = addr;
hotpage_count[i] = HOTPAGE_STEP;
__sync_synchronize();
return;
}
if(addr==hotpage[i]+0x1000*(hotpage_size[i]+2)) {
hotpage_size[i]+=2;
hotpage_count[i] = HOTPAGE_STEP;
FuseHotPage(i);
__sync_synchronize();
return;
}
if(addr+0x2000==hotpage[i]) {
hotpage_size[i]+=2;
hotpage[i] = addr;
hotpage_count[i] = HOTPAGE_STEP;
FuseHotPage(i);
__sync_synchronize();
return;
}
}
// look for empty spot / minium
int mincnt = hotpage_count[0]*(hotpage_size[0]+1);
int minidx = 0;
for(int i=1; i<MAX_HOTPAGE; ++i)
if((hotpage_count[i]*(hotpage_size[i]+1))<mincnt) {
mincnt = (hotpage_count[i]*(hotpage_size[i]+1));
minidx = i;
}
if(hotpage_count[minidx]) {
static int cnt = 0;
if(cnt<50) {
dynarec_log(LOG_NONE, "Warning, not enough Hotpage, replacing %p(%p/%d) with %p\n", (void*)hotpage[minidx], (void*)(0x1000*(hotpage_size[minidx]+1)), hotpage_count[minidx], (void*)addr);
++cnt;
if(cnt==50) // stop spamming console with message...
dynarec_log(LOG_NONE, " will stop warning about not enough Hotpage now\n");
}
hotpage_size[minidx] = 0;
} else
++hotpages;
hotpage[minidx] = addr;
hotpage_count[minidx] = HOTPAGE_STEP;
__sync_synchronize();
}
dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current)
dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create)
{
dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, *current, 1);
dynablock_t *father = (db && db->father)?db->father:db;
if(father && father->done && db->block && father->need_test) {
if(pthread_mutex_trylock(&my_context->mutex_dyndump)) {
dynarec_log(LOG_DEBUG, "mutex_dyndump not available when trying to validate block %p from %p:%p (hash:%X) with %d son(s) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, db->sons_size, (void*)addr);
dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, 1);
if(db && db->done && db->block && db->need_test) {
if(AreaInHotPage((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr + db->x64_size - 1)) {
if(box64_dynarec_fastpage) {
uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size);
if(hash==db->hash) // seems ok, run it without reprotecting it
return db;
db->done = 0; // invalidating the block, it's already not good
dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, hash, db->hash, (void*)addr);
// Free db, it's now invalid!
FreeDynablock(db, 1);
return NULL; // not building a new one, it's still a hotpage
} else {
dynarec_log(LOG_INFO, "Not running block %p from %p:%p with for %p because it's in a hotpage\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, (void*)addr);
return NULL;
}
}
uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size);
if(mutex_trylock(&my_context->mutex_dyndump)) {
dynarec_log(LOG_DEBUG, "mutex_dyndump not available when trying to validate block %p from %p:%p (hash:%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, (void*)addr);
return NULL;
}
if(AreaInHotPage((uintptr_t)father->x64_addr, (uintptr_t)father->x64_addr + father->x64_size - 1)) {
dynarec_log(LOG_DEBUG, "Not running block %p from %p:%p with %d son(s) for %p because it's in a hotpage\n", father, father->x64_addr, father->x64_addr+father->x64_size-1, father->sons_size, (void*)addr);
pthread_mutex_unlock(&my_context->mutex_dyndump);
return NULL;
}
uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size);
if(hash!=father->hash) {
father->done = 0; // invalidating the block
dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size-1, hash, father->hash, father->sons_size, (void*)addr);
// no more current if it gets invalidated too
if(*current && IntervalIntersects(
(uintptr_t)father->x64_addr,
(uintptr_t)father->x64_addr+father->x64_size-1,
(uintptr_t)(*current)->x64_addr,
(uintptr_t)(*current)->x64_addr+(*current)->x64_size-1))
*current = NULL;
// Free father, it's now invalid!
FreeDynablock(father, 0);
if(hash!=db->hash) {
db->done = 0; // invalidating the block
dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, hash, db->hash, (void*)addr);
// Free db, it's now invalid!
FreeDynablock(db, 0);
// start again... (will create a new block)
db = internalDBGetBlock(emu, addr, addr, create, *current, 0);
db = internalDBGetBlock(emu, addr, addr, create, 0);
} else {
father->need_test = 0;
dynarec_log(LOG_DEBUG, "Validating block %p from %p:%p (hash:%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size-1, father->hash, father->sons_size, (void*)addr);
protectDB((uintptr_t)father->x64_addr, father->x64_size);
db->need_test = 0;
dynarec_log(LOG_DEBUG, "Validating block %p from %p:%p (hash:%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, (void*)addr);
protectDB((uintptr_t)db->x64_addr, db->x64_size);
// fill back jumptable
addJumpTableIfDefault64(father->x64_addr, father->block);
for(int i=0; i<father->sons_size; ++i)
addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block);
setJumpTableIfRef64(db->x64_addr, db->block, db->jmpnext);
}
pthread_mutex_unlock(&my_context->mutex_dyndump);
mutex_unlock(&my_context->mutex_dyndump);
}
return db;
}
@ -563,28 +247,25 @@ dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr)
{
dynarec_log(LOG_DEBUG, "Creating AlternateBlock at %p for %p\n", (void*)addr, (void*)filladdr);
int create = 1;
dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, NULL, 1);
dynablock_t *father = (db && db->father)?db->father:db;
if(father && father->done && db->block && father->need_test) {
if(pthread_mutex_trylock(&my_context->mutex_dyndump))
dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, 1);
if(db && db->done && db->block && db->need_test) {
if(mutex_trylock(&my_context->mutex_dyndump))
return NULL;
uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size);
if(hash!=father->hash) {
father->done = 0; // invalidating the block
dynarec_log(LOG_DEBUG, "Invalidating alt block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr);
// Free father, it's now invalid!
FreeDynablock(father, 0);
uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size);
if(hash!=db->hash) {
db->done = 0; // invalidating the block
dynarec_log(LOG_DEBUG, "Invalidating alt block %p from %p:%p (hash:%X/%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size, hash, db->hash, (void*)addr);
// Free db, it's now invalid!
FreeDynablock(db, 0);
// start again... (will create a new block)
db = internalDBGetBlock(emu, addr, filladdr, create, NULL, 0);
db = internalDBGetBlock(emu, addr, filladdr, create, 0);
} else {
father->need_test = 0;
protectDB((uintptr_t)father->x64_addr, father->x64_size);
db->need_test = 0;
protectDB((uintptr_t)db->x64_addr, db->x64_size);
// fill back jumptable
addJumpTableIfDefault64(father->x64_addr, father->block);
for(int i=0; i<father->sons_size; ++i)
addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block);
addJumpTableIfDefault64(db->x64_addr, db->block);
}
pthread_mutex_unlock(&my_context->mutex_dyndump);
mutex_unlock(&my_context->mutex_dyndump);
}
return db;
}

View File

@ -1,19 +1,14 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __DYNABLOCK_PRIVATE_H_
#define __DYNABLOCK_PRIVATE_H_
typedef struct dynablocklist_s dynablocklist_t;
typedef struct instsize_s {
unsigned int x64:4;
unsigned int nat:4;
unsigned char x64:4;
unsigned char nat:4;
} instsize_t;
typedef struct dynablock_s {
dynablocklist_t* parent;
void* block;
void* block; // block-sizeof(void*) == self
void* actual_block; // the actual start of the block (so block-sizeof(void*))
int size;
void* x64_addr;
uintptr_t x64_size;
@ -23,18 +18,8 @@ typedef struct dynablock_s {
uint8_t gone;
uint8_t dummy;
int isize;
dynablock_t** sons; // sons (kind-of dummy dynablock...)
int sons_size;
dynablock_t* father; // set only in the case of a son
instsize_t* instsize;
void* jmpnext; // a branch jmpnext code when block is marked
} dynablock_t;
typedef struct dynablocklist_s {
uintptr_t text;
int textsz;
uintptr_t maxend; // max address end for anyblock on this blocklist
uintptr_t minstart; // min start address for block overlapping this blocklist
dynablock_t** direct; // direct mapping (waste of space, so the array is created at first write)
} dynablocklist_t;
#endif //__DYNABLOCK_PRIVATE_H_

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
@ -8,17 +5,73 @@
#include <setjmp.h>
#include "debug.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "tools/bridge_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "threads.h"
#ifdef DYNAREC
#include "dynablock.h"
#include "dynablock_private.h"
#include "bridge.h"
#include "dynarec_next.h"
#endif
#ifdef HAVE_TRACE
#include "elfloader.h"
#endif
#ifdef DYNAREC
uintptr_t getX64Address(dynablock_t* db, uintptr_t arm_addr);
void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2, uintptr_t* x3)
{
#ifdef HAVE_TRACE
if(!addr) {
dynablock_t* db = FindDynablockFromNativeAddress(x2-4);
printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x64addr=%p/%s)\n", x2-4, db, db?(void*)getX64Address(db, (uintptr_t)x2-4):NULL, db?getAddrFunctionName(getX64Address(db, (uintptr_t)x2-4)):"(nil)");
}
#endif
void * jblock;
dynablock_t* block = DBGetBlock(emu, addr, 1);
if(!block) {
// no block, let link table as is...
if(hasAlternate((void*)addr)) {
printf_log(LOG_DEBUG, "Jmp address has alternate: %p", (void*)addr);
if(box64_log<LOG_DEBUG) dynarec_log(LOG_INFO, "Jmp address has alternate: %p", (void*)addr);
addr = (uintptr_t)getAlternate((void*)addr); // set new address
R_RIP = addr; // but also new RIP!
*x3 = addr; // and the RIP in x27 register
printf_log(LOG_DEBUG, " -> %p\n", (void*)addr);
block = DBGetBlock(emu, addr, 1);
}
if(!block) {
#ifdef HAVE_TRACE
dynablock_t* db = FindDynablockFromNativeAddress(x2-4);
elfheader_t* h = FindElfAddress(my_context, (uintptr_t)x2-4);
dynarec_log(LOG_INFO, "Warning, jumping to a no-block address %p from %p (db=%p, x64addr=%p(elf=%s))\n", (void*)addr, x2-4, db, db?(void*)getX64Address(db, (uintptr_t)x2-4):NULL, h?ElfName(h):"(none)");
#endif
//tableupdate(native_epilog, addr, table);
return native_epilog;
}
}
if(!block->done) {
// not finished yet... leave linker
return native_epilog;
}
if(!(jblock=block->block)) {
// null block, but done: go to epilog, no linker here
return native_epilog;
}
//dynablock_t *father = block->father?block->father:block;
return jblock;
}
#endif
#ifdef __GNUC__
// Disable "clobbered" warnings
@ -42,7 +95,66 @@ void DynaCall(x64emu_t* emu, uintptr_t addr)
}
}
}
EmuCall(emu, addr);
#ifdef DYNAREC
if(!box64_dynarec)
#endif
EmuCall(emu, addr);
#ifdef DYNAREC
else {
uint64_t old_rsp = R_RSP;
uint64_t old_rbx = R_RBX;
uint64_t old_rdi = R_RDI;
uint64_t old_rsi = R_RSI;
uint64_t old_rbp = R_RBP;
uint64_t old_rip = R_RIP;
PushExit(emu);
R_RIP = addr;
emu->df = d_none;
while(!emu->quit) {
dynablock_t* block = DBGetBlock(emu, R_RIP, 1);
if(!block || !block->block || !block->done) {
// no block, of block doesn't have DynaRec content (yet, temp is not null)
// Use interpreter (should use single instruction step...)
dynarec_log(LOG_DEBUG, "%04d|Calling Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu);
Run(emu, 1);
} else {
dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x64 instructions emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,emu);
CHECK_FLAGS(emu);
// block is here, let's run it!
native_prolog(emu, block->block);
}
if(emu->fork) {
int forktype = emu->fork;
emu->quit = 0;
emu->fork = 0;
emu = x64emu_fork(emu, forktype);
if(emu->type == EMUTYPE_MAIN) {
ejb = GetJmpBuf();
ejb->emu = emu;
ejb->jmpbuf_ok = 1;
jmpbuf_reset = 1;
if(sigsetjmp((struct __jmp_buf_tag*)ejb->jmpbuf, 1)) {
printf_log(LOG_DEBUG, "Setjmp inner DynaCall, fs=0x%x\n", ejb->emu->segs[_FS]);
addr = R_RIP;
}
}
}
}
emu->quit = 0; // reset Quit flags...
emu->df = d_none;
if(emu->quitonlongjmp && emu->longjmp) {
if(emu->quitonlongjmp==1)
emu->longjmp = 0; // don't change anything because of the longjmp
} else {
R_RBX = old_rbx;
R_RDI = old_rdi;
R_RSI = old_rsi;
R_RBP = old_rbp;
R_RSP = old_rsp;
R_RIP = old_rip; // and set back instruction pointer
}
}
#endif
// clear the setjmp
if(ejb && jmpbuf_reset)
ejb->jmpbuf_ok = 0;
@ -52,16 +164,60 @@ int DynaRun(x64emu_t* emu)
{
// prepare setjump for signal handling
emu_jmpbuf_t *ejb = NULL;
#ifdef DYNAREC
int jmpbuf_reset = 1;
#endif
if(emu->type == EMUTYPE_MAIN) {
ejb = GetJmpBuf();
if(!ejb->jmpbuf_ok) {
ejb->emu = emu;
ejb->jmpbuf_ok = 1;
#ifdef DYNAREC
jmpbuf_reset = 1;
#endif
if(sigsetjmp((struct __jmp_buf_tag*)ejb->jmpbuf, 1))
printf_log(LOG_DEBUG, "Setjmp DynaRun, fs=0x%x\n", ejb->emu->segs[_FS]);
}
}
return Run(emu, 0);
#ifdef DYNAREC
if(!box64_dynarec)
#endif
return Run(emu, 0);
#ifdef DYNAREC
else {
while(!emu->quit) {
dynablock_t* block = DBGetBlock(emu, R_RIP, 1);
if(!block || !block->block || !block->done) {
// no block, of block doesn't have DynaRec content (yet, temp is not null)
// Use interpreter (should use single instruction step...)
dynarec_log(LOG_DEBUG, "%04d|Running Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu);
Run(emu, 1);
} else {
dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x64 insts emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, emu);
// block is here, let's run it!
native_prolog(emu, block->block);
}
if(emu->fork) {
int forktype = emu->fork;
emu->quit = 0;
emu->fork = 0;
emu = x64emu_fork(emu, forktype);
if(emu->type == EMUTYPE_MAIN) {
ejb = GetJmpBuf();
ejb->emu = emu;
ejb->jmpbuf_ok = 1;
jmpbuf_reset = 1;
if(sigsetjmp((struct __jmp_buf_tag*)ejb->jmpbuf, 1))
printf_log(LOG_DEBUG, "Setjmp inner DynaRun, fs=0x%x\n", ejb->emu->segs[_FS]);
}
}
}
}
// clear the setjmp
if(ejb && jmpbuf_reset)
ejb->jmpbuf_ok = 0;
return 0;
#endif
}
#ifdef __GNUC__
#pragma GCC diagnostic pop

View File

@ -1,9 +1,22 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __DYNAREC_ARCH__H_
#define __DYNAREC_ARCH__H_
#ifdef RV64
#define instruction_native_t instruction_rv64_t
#define dynarec_native_t dynarec_rv64_t
#define ADDITIONNAL_DEFINITION() \
int fpuCacheNeedsTransform(dynarec_native_t* dyn, int ninst);
#define OTHER_CACHE() \
if (fpuCacheNeedsTransform(dyn, ninst)) ret|=2;
#include "rv64/rv64_printer.h"
#include "rv64/dynarec_rv64_private.h"
#include "rv64/dynarec_rv64_functions.h"
#else
#error Unsupported platform
#endif
#endif //__DYNAREC_ARCH__H_

View File

@ -1,9 +1,10 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __DYNAREC_HELPER__H_
#define __DYNAREC_HELPER__H_
#ifdef RV64
#include "rv64/dynarec_rv64_helper.h"
#else
#error Unsupported architecture
#endif
#endif //__DYNAREC_HELPER__H_

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
@ -9,14 +6,14 @@
#include <assert.h>
#include "debug.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "custommem.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "tools/bridge_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
@ -26,21 +23,22 @@
#include "dynarec_native.h"
#include "dynarec_arch.h"
#include "dynarec_next.h"
void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name) {
uint8_t *ip = (uint8_t*)inst->addr;
if(ip[0]==0xcc && ip[1]=='S' && ip[2]=='C') {
uintptr_t a = *(uintptr_t*)(ip+3);
if(a==0) {
dynarec_log(LOG_NONE, "%s%p: Exit x64emu%s\n", (rvtrans_dynarec_dump>1)?"\e[01;33m":"", (void*)ip, (rvtrans_dynarec_dump>1)?"\e[m":"");
dynarec_log(LOG_NONE, "%s%p: Exit x64emu%s\n", (box64_dynarec_dump>1)?"\e[01;33m":"", (void*)ip, (box64_dynarec_dump>1)?"\e[m":"");
} else {
dynarec_log(LOG_NONE, "%s%p: Native call to %p%s\n", (rvtrans_dynarec_dump>1)?"\e[01;33m":"", (void*)ip, (void*)a, (rvtrans_dynarec_dump>1)?"\e[m":"");
dynarec_log(LOG_NONE, "%s%p: Native call to %p%s\n", (box64_dynarec_dump>1)?"\e[01;33m":"", (void*)ip, (void*)a, (box64_dynarec_dump>1)?"\e[m":"");
}
} else {
if(dec) {
dynarec_log(LOG_NONE, "%s%p: %s", (rvtrans_dynarec_dump>1)?"\e[01;33m":"", ip, DecodeX64Trace(dec, inst->addr));
dynarec_log(LOG_NONE, "%s%p: %s", (box64_dynarec_dump>1)?"\e[01;33m":"", ip, DecodeX64Trace(dec, inst->addr));
} else {
dynarec_log(LOG_NONE, "%s%p: ", (rvtrans_dynarec_dump>1)?"\e[01;33m":"", ip);
dynarec_log(LOG_NONE, "%s%p: ", (box64_dynarec_dump>1)?"\e[01;33m":"", ip);
for(int i=0; i<inst->size; ++i) {
dynarec_log(LOG_NONE, "%02X ", ip[i]);
}
@ -57,19 +55,27 @@ void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const cha
}
}
// end of line and colors
dynarec_log(LOG_NONE, "%s\n", (rvtrans_dynarec_dump>1)?"\e[m":"");
dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");
}
}
void add_next(dynarec_native_t *dyn, uintptr_t addr) {
if(!rvtrans_dynarec_bigblock)
if(!box64_dynarec_bigblock)
return;
// exist?
for(int i=0; i<dyn->next_sz; ++i)
if(dyn->next[i]==addr)
return;
// put in a free slot
for(int i=0; i<dyn->next_sz; ++i)
if(!dyn->next[i]) {
dyn->next[i] = addr;
return;
}
// add slots
if(dyn->next_sz == dyn->next_cap) {
dyn->next_cap += 16;
dyn->next = (uintptr_t*)box_realloc(dyn->next, dyn->next_cap*sizeof(uintptr_t));
dyn->next_cap += 64;
dyn->next = (uintptr_t*)customRealloc(dyn->next, dyn->next_cap*sizeof(uintptr_t));
}
dyn->next[dyn->next_sz++] = addr;
}
@ -78,14 +84,15 @@ uintptr_t get_closest_next(dynarec_native_t *dyn, uintptr_t addr) {
uintptr_t best = 0;
int i = 0;
while((i<dyn->next_sz) && (best!=addr)) {
if(dyn->next[i]<addr) { // remove the address, it's before current address
memmove(dyn->next+i, dyn->next+i+1, (dyn->next_sz-i-1)*sizeof(uintptr_t));
--dyn->next_sz;
} else {
if((dyn->next[i]<best) || !best)
best = dyn->next[i];
++i;
if(dyn->next[i]) {
if(dyn->next[i]<addr) { // remove the address, it's before current address
dyn->next[i] = 0;
} else {
if((dyn->next[i]<best) || !best)
best = dyn->next[i];
}
}
++i;
}
return best;
}
@ -235,7 +242,7 @@ int is_instructions(dynarec_native_t *dyn, uintptr_t addr, int n)
return (i==n)?1:0;
}
instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, int native_size)
void addInst(instsize_t* insts, size_t* size, int x64_size, int native_size)
{
// x64 instruction is <16 bytes
int toadd;
@ -243,10 +250,6 @@ instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size,
toadd = 1 + x64_size/15;
else
toadd = 1 + native_size/15;
if((*size)+toadd>(*cap)) {
*cap = (*size)+toadd;
insts = (instsize_t*)box_realloc(insts, (*cap)*sizeof(instsize_t));
}
while(toadd) {
if(x64_size>15)
insts[*size].x64 = 15;
@ -261,7 +264,6 @@ instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size,
++(*size);
--toadd;
}
return insts;
}
// add a value to table64 (if needed) and gives back the imm19 to use in LDR_literal
@ -275,8 +277,8 @@ int Table64(dynarec_native_t *dyn, uint64_t val)
// not found, add it
if(idx==-1) {
if(dyn->table64size == dyn->table64cap) {
dyn->table64cap+=4;
dyn->table64 = (uint64_t*)box_realloc(dyn->table64, dyn->table64cap * sizeof(uint64_t));
dyn->table64cap+=16;
dyn->table64 = (uint64_t*)customRealloc(dyn->table64, dyn->table64cap * sizeof(uint64_t));
}
idx = dyn->table64size++;
dyn->table64[idx] = val;
@ -304,7 +306,7 @@ static void fillPredecessors(dynarec_native_t* dyn)
dyn->insts[i+1].pred_sz++;
}
}
dyn->predecessor = (int*)box_malloc(pred_sz*sizeof(int));
dyn->predecessor = (int*)customMalloc(pred_sz*sizeof(int));
// fill pred pointer
int* p = dyn->predecessor;
for(int i=0; i<dyn->size; ++i) {
@ -324,73 +326,76 @@ static void fillPredecessors(dynarec_native_t* dyn)
}
static void updateNeed(dynarec_native_t* dyn, int ninst, uint32_t need) {
uint32_t old_need = dyn->insts[ninst].x64.need_flags;
uint32_t new_need = old_need | need;
uint32_t new_use = dyn->insts[ninst].x64.use_flags;
uint32_t old_use = dyn->insts[ninst].x64.old_use;
if((new_need&X_PEND) && dyn->insts[ninst].x64.state_flags==SF_SUBSET) {
new_need &=~X_PEND;
new_need |= X_ALL;
// updateNeed goes backward, from last intruction to top
static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) {
while (ninst>=0) {
// need pending but instruction is only a subset: remove pend and use an X_ALL instead
need |= dyn->insts[ninst].x64.need_after;
if((need&X_PEND) && (dyn->insts[ninst].x64.state_flags==SF_SUBSET)) {
need &=~X_PEND;
need |= X_ALL;
}
if((need&X_PEND) && (dyn->insts[ninst].x64.state_flags==SF_SET)) {
need &=~X_PEND;
need |= dyn->insts[ninst].x64.set_flags; // SF_SET will compute all flags, it's not SUBSET!
}
if((need&X_PEND) && dyn->insts[ninst].x64.state_flags==SF_SUBSET_PENDING) {
need |= X_ALL&~(dyn->insts[ninst].x64.set_flags);
}
dyn->insts[ninst].x64.gen_flags = need&dyn->insts[ninst].x64.set_flags;
if((need&X_PEND) && (dyn->insts[ninst].x64.state_flags&SF_PENDING))
dyn->insts[ninst].x64.gen_flags |= X_PEND;
dyn->insts[ninst].x64.need_after = need;
need = dyn->insts[ninst].x64.need_after&~dyn->insts[ninst].x64.gen_flags;
if(dyn->insts[ninst].x64.may_set)
need |= dyn->insts[ninst].x64.gen_flags; // forward the flags
// Consume X_PEND if relevant
if((need&X_PEND) && (dyn->insts[ninst].x64.set_flags&SF_PENDING))
need &=~X_PEND;
need |= dyn->insts[ninst].x64.use_flags;
if(dyn->insts[ninst].x64.need_before == need)
return ninst - 1;
dyn->insts[ninst].x64.need_before = need;
if(dyn->insts[ninst].x64.barrier&BARRIER_FLAGS) {
need = need?X_PEND:0;
}
int ok = 0;
for(int i=0; i<dyn->insts[ninst].pred_sz; ++i) {
if(dyn->insts[ninst].pred[i] == ninst-1)
ok = 1;
else
updateNeed(dyn, dyn->insts[ninst].pred[i], need);
}
if(!ok)
return ninst - 1;
--ninst;
}
uint32_t new_set = 0;
if(dyn->insts[ninst].x64.state_flags & SF_SET)
new_set = dyn->insts[ninst].x64.set_flags;
if(dyn->insts[ninst].x64.state_flags & SF_PENDING)
new_set |= X_PEND;
if((new_need&X_PEND) && (
dyn->insts[ninst].x64.state_flags==SF_SET || dyn->insts[ninst].x64.state_flags==SF_SUBSET)) {
new_need &=~X_PEND;
new_need |=X_ALL;
}
dyn->insts[ninst].x64.need_flags = new_need;
dyn->insts[ninst].x64.old_use = new_use;
if(dyn->insts[ninst].x64.jmp_insts==-1)
new_need |= X_PEND;
if((new_need == old_need) && (new_use == old_use)) // no changes, bye
return;
new_need &=~new_set; // clean needed flag that were suplied
new_need |= new_use; // new need
// a Flag Barrier will change all need to "Pending", as it clear all flags optimisation
if(new_need && dyn->insts[ninst].x64.barrier&BARRIER_FLAGS)
new_need = X_PEND;
if((new_need == (X_ALL|X_PEND)) && (dyn->insts[ninst].x64.state_flags & SF_SET))
new_need = X_ALL;
//update need to new need on predecessor
for(int i=0; i<dyn->insts[ninst].pred_sz; ++i)
updateNeed(dyn, dyn->insts[ninst].pred[i], new_need);
return ninst;
}
static void resetNeed(dynarec_native_t* dyn) {
for(int i = dyn->size; i-- > 0;) {
dyn->insts[i].x64.old_use = 0;
dyn->insts[i].x64.need_flags = dyn->insts[i].x64.default_need;
}
}
void* current_helper = NULL;
__thread void* current_helper = NULL;
void CancelBlock64()
void CancelBlock64(int need_lock)
{
if(need_lock)
mutex_lock(&my_context->mutex_dyndump);
dynarec_native_t* helper = (dynarec_native_t*)current_helper;
current_helper = NULL;
if(!helper)
if(!helper) {
if(need_lock)
mutex_unlock(&my_context->mutex_dyndump);
return;
box_free(helper->next);
box_free(helper->insts);
box_free(helper->table64);
box_free(helper->sons_x64);
box_free(helper->sons_native);
if(helper->dynablock && helper->dynablock->block)
FreeDynarecMap(helper->dynablock, (uintptr_t)helper->dynablock->block, helper->dynablock->size);
}
customFree(helper->next);
customFree(helper->insts);
customFree(helper->predecessor);
customFree(helper->table64);
if(helper->dynablock && helper->dynablock->actual_block)
FreeDynarecMap((uintptr_t)helper->dynablock->actual_block);
else if(helper->dynablock && helper->block)
FreeDynarecMap((uintptr_t)helper->block-sizeof(void*));
if(need_lock)
mutex_unlock(&my_context->mutex_dyndump);
}
uintptr_t native_pass0(dynarec_native_t* dyn, uintptr_t addr);
@ -398,14 +403,55 @@ uintptr_t native_pass1(dynarec_native_t* dyn, uintptr_t addr);
uintptr_t native_pass2(dynarec_native_t* dyn, uintptr_t addr);
uintptr_t native_pass3(dynarec_native_t* dyn, uintptr_t addr);
void* CreateEmptyBlock(dynablock_t* block, uintptr_t addr) {
block->isize = 0;
block->done = 0;
size_t sz = 4*sizeof(void*);
void* actual_p = (void*)AllocDynarecMap(sz);
void* p = actual_p + sizeof(void*);
if(actual_p==NULL) {
dynarec_log(LOG_INFO, "AllocDynarecMap(%p, %zu) failed, cancelling block\n", block, sz);
CancelBlock64(0);
return NULL;
}
block->size = sz;
block->actual_block = actual_p;
block->block = p;
block->jmpnext = p;
*(dynablock_t**)actual_p = block;
*(void**)(p+2*sizeof(void*)) = native_epilog;
CreateJmpNext(block->jmpnext, p+2*sizeof(void*));
block->need_test = 0;
// all done...
__clear_cache(actual_p, actual_p+sz); // need to clear the cache before execution...
return block;
}
void* FillBlock64(dynablock_t* block, uintptr_t addr) {
/*
A Block must have this layout:
0x0000..0x0007 : dynablock_t* : self
0x0008..8+4*n : actual Native instructions, (n is the total number)
A .. A+8*n : Table64: n 64bits values
B .. B+7 : dynablock_t* : self (as part of JmpNext, that simulate another block)
B+8 .. B+15 : 2 Native code for jmpnext (or jmp epilog in case of empty block)
B+16 .. B+23 : jmpnext (or jmp_epilog) address
B+24 .. B+31 : empty (in case an architecture needs more than 2 opcodes)
B+32 .. B+32+sz : instsize (compressed array with each instruction lenght on x64 and native side)
*/
if(IsInHotPage(addr)) {
dynarec_log(LOG_DEBUG, "Cancelling dynarec FillBlock on hotpage for %p\n", (void*)addr);
return NULL;
}
if(addr>=rvtrans_nodynarec_start && addr<rvtrans_nodynarec_end) {
block->done = 1;
return (void*)block;
if(addr>=box64_nodynarec_start && addr<box64_nodynarec_end) {
dynarec_log(LOG_INFO, "Create empty block in no-dynarec zone\n");
return CreateEmptyBlock(block, addr);
}
if(current_helper) {
dynarec_log(LOG_DEBUG, "Cancelling dynarec FillBlock at %p as anothor one is going on\n", (void*)addr);
return NULL;
}
// protect the 1st page
protectDB(addr, 1);
@ -415,41 +461,38 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) {
helper.dynablock = block;
helper.start = addr;
uintptr_t start = addr;
helper.cap = 64; // needs epilog handling
helper.insts = (instruction_native_t*)box_calloc(helper.cap, sizeof(instruction_native_t));
helper.cap = 128;
helper.insts = (instruction_native_t*)customCalloc(helper.cap, sizeof(instruction_native_t));
// pass 0, addresses, x64 jump addresses, overall size of the block
uintptr_t end = native_pass0(&helper, addr);
// no need for next anymore
box_free(helper.next);
customFree(helper.next);
helper.next_sz = helper.next_cap = 0;
helper.next = NULL;
// basic checks
if(!helper.size) {
dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
CancelBlock64();
return (void*)block;
CancelBlock64(0);
return CreateEmptyBlock(block, addr);;
}
if(!isprotectedDB(addr, 1)) {
dynarec_log(LOG_INFO, "Warning, write on current page on pass0, aborting dynablock creation (%p)\n", (void*)addr);
CancelBlock64();
AddHotPage(addr);
CancelBlock64(0);
return NULL;
}
// protect the block of it goes over the 1st page
if((addr&~0xfff)!=(end&~0xfff)) // need to protect some other pages too
if((addr&~box64_pagesize)!=(end&~box64_pagesize)) // need to protect some other pages too
protectDB(addr, end-addr); //end is 1byte after actual end
// compute hash signature
uint32_t hash = X31_hash_code((void*)addr, end-addr);
// Compute flag_need, without current barriers
resetNeed(&helper);
for(int i = helper.size; i-- > 0;)
updateNeed(&helper, i, 0);
// calculate barriers
for(int i=0; i<helper.size; ++i)
if(helper.insts[i].x64.jmp) {
uintptr_t j = helper.insts[i].x64.jmp;
if(j<start || j>=end) {
if(j<start || j>=end || j==helper.insts[i].x64.addr) {
helper.insts[i].x64.jmp_insts = -1;
helper.insts[i].x64.use_flags |= X_PEND;
helper.insts[i].x64.need_after |= X_PEND;
} else {
// find jump address instruction
int k=-1;
@ -464,75 +507,42 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) {
}
// fill predecessors with the jump address
fillPredecessors(&helper);
// check for the optionnal barriers now
for(int i=helper.size-1; i>=0; --i) {
if(helper.insts[i].barrier_maybe) {
// out-of-block jump
if(helper.insts[i].x64.jmp_insts == -1) {
// nothing for now
} else {
// inside block jump
int k = helper.insts[i].x64.jmp_insts;
if(k>i) {
// jump in the future
if(helper.insts[k].pred_sz>1) {
// with multiple flow, put a barrier
helper.insts[k].x64.barrier|=BARRIER_FLAGS;
}
} else {
// jump back
helper.insts[k].x64.barrier|=BARRIER_FLAGS;
}
}
}
}
// reset need_flags and compute again, now taking barrier into account (because barrier change use_flags)
for(int i = helper.size; i-- > 0;) {
int k;
if(helper.insts[i].x64.jmp
&& ((k=helper.insts[i].x64.jmp_insts)>=0)
) {
if(helper.insts[k].x64.barrier&BARRIER_FLAGS)
// jumpto barrier
helper.insts[i].x64.use_flags |= X_PEND;
if(helper.insts[i].x64.barrier&BARRIER_FLAGS && (helper.insts[k].x64.need_flags | helper.insts[k].x64.use_flags))
helper.insts[k].x64.barrier|=BARRIER_FLAGS;
else
helper.insts[i].x64.use_flags |= (helper.insts[k].x64.need_flags | helper.insts[k].x64.use_flags);
}
if(helper.insts[i].x64.barrier&BARRIER_FLAGS && !(helper.insts[i].x64.set_flags&SF_PENDING))
// immediate barrier
helper.insts[i].x64.use_flags |= X_PEND;
}
resetNeed(&helper);
for(int i = helper.size; i-- > 0;)
updateNeed(&helper, i, 0);
int pos = helper.size;
while (pos>=0)
pos = updateNeed(&helper, pos, 0);
// pass 1, float optimisations, first pass for flags
native_pass1(&helper, addr);
// pass 2, instruction size
native_pass2(&helper, addr);
// keep size of instructions for signal handling
size_t insts_rsize = (helper.insts_size+2)*sizeof(instsize_t);
insts_rsize = (insts_rsize+7)&~7; // round the size...
// ok, now allocate mapped memory, with executable flag on
size_t sz = helper.native_size + helper.table64size*sizeof(uint64_t);
void* p = (void*)AllocDynarecMap(block, sz);
if(p==NULL) {
size_t sz = sizeof(void*) + helper.native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize;
// dynablock_t* block (arm insts) table64 jmpnext code instsize
void* actual_p = (void*)AllocDynarecMap(sz);
void* p = actual_p + sizeof(void*);
void* next = p + helper.native_size + helper.table64size*sizeof(uint64_t);
void* instsize = next + 4*sizeof(void*);
if(actual_p==NULL) {
dynarec_log(LOG_INFO, "AllocDynarecMap(%p, %zu) failed, cancelling block\n", block, sz);
CancelBlock64();
CancelBlock64(0);
return NULL;
}
helper.block = p;
helper.native_start = (uintptr_t)p;
helper.tablestart = helper.native_start + helper.native_size;
if(helper.sons_size) {
helper.sons_x64 = (uintptr_t*)box_calloc(helper.sons_size, sizeof(uintptr_t));
helper.sons_native = (void**)box_calloc(helper.sons_size, sizeof(void*));
}
helper.insts_size = 0; // reset
helper.instsize = (instsize_t*)instsize;
*(dynablock_t**)actual_p = block;
// pass 3, emit (log emit native opcode)
if(rvtrans_dynarec_dump) {
dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u x64 bytes", (rvtrans_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize);
if(box64_dynarec_dump) {
dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u x64 bytes", (box64_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize);
printFunctionAddr(helper.start, " => ");
dynarec_log(LOG_NONE, "%s\n", (rvtrans_dynarec_dump>1)?"\e[m":"");
dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");
}
int oldtable64size = helper.table64size;
size_t oldnativesize = helper.native_size;
@ -540,7 +550,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) {
helper.table64size = 0; // reset table64 (but not the cap)
native_pass3(&helper, addr);
if((oldnativesize!=helper.native_size) || (oldtable64size<helper.table64size)) {
printf_log(LOG_NONE, "RVTRANS: Warning, size difference in block between pass2 (%zu) & pass3 (%zu)!\n", sz, helper.native_size+helper.table64size*8);
printf_log(LOG_NONE, "BOX64: Warning, size difference in block between pass2 (%zu) & pass3 (%zu)!\n", sz, helper.native_size+helper.table64size*8);
uint8_t *dump = (uint8_t*)helper.start;
printf_log(LOG_NONE, "Dump of %d x64 opcodes:\n", helper.size);
for(int i=0; i<helper.size; ++i) {
@ -557,75 +567,43 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) {
if(helper.table64size) {
memcpy((void*)helper.tablestart, helper.table64, helper.table64size*8);
}
// all done...
__clear_cache(p, p+sz); // need to clear the cache before execution...
// keep size of instructions for signal handling
{
size_t cap = 1;
for(int i=0; i<helper.size; ++i)
cap += 1 + ((helper.insts[i].x64.size>helper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15;
size_t size = 0;
block->instsize = (instsize_t*)box_calloc(cap, sizeof(instsize_t));
for(int i=0; i<helper.size; ++i)
block->instsize = addInst(block->instsize, &size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4);
block->instsize = addInst(block->instsize, &size, &cap, 0, 0); // add a "end of block" mark, just in case
}
block->instsize = instsize;
// ok, free the helper now
box_free(helper.insts);
customFree(helper.insts);
helper.insts = NULL;
box_free(helper.table64);
customFree(helper.table64);
helper.table64 = NULL;
helper.instsize = NULL;
customFree(helper.predecessor);
helper.predecessor = NULL;
block->size = sz;
block->isize = helper.size;
block->actual_block = actual_p;
block->block = p;
block->jmpnext = next+sizeof(void*);
*(dynablock_t**)next = block;
*(void**)(next+2*sizeof(void*)) = native_next;
CreateJmpNext(block->jmpnext, next+2*sizeof(void*));
block->need_test = 0;
//block->x64_addr = (void*)start;
block->x64_size = end-start;
// all done...
__clear_cache(actual_p, actual_p+sz); // need to clear the cache before execution...
block->hash = X31_hash_code(block->x64_addr, block->x64_size);
// Check if something changed, to abbort if it as
if((block->hash != hash)) {
dynarec_log(LOG_INFO, "Warning, a block changed while beeing processed hash(%p:%ld)=%x/%x\n", block->x64_addr, block->x64_size, block->hash, hash);
CancelBlock64();
dynarec_log(LOG_DEBUG, "Warning, a block changed while beeing processed hash(%p:%ld)=%x/%x\n", block->x64_addr, block->x64_size, block->hash, hash);
AddHotPage(addr);
CancelBlock64(0);
return NULL;
}
if(!isprotectedDB(addr, end-addr)) {
dynarec_log(LOG_INFO, "Warning, block unprotected while beeing processed %p:%ld, cancelling\n", block->x64_addr, block->x64_size);
CancelBlock64();
return NULL;
dynarec_log(LOG_DEBUG, "Warning, block unprotected while beeing processed %p:%ld, cancelling\n", block->x64_addr, block->x64_size);
AddHotPage(addr);
block->need_test = 1;
//protectDB(addr, end-addr);
}
// fill sons if any
dynablock_t** sons = NULL;
int sons_size = 0;
if(helper.sons_size) {
sons = (dynablock_t**)box_calloc(helper.sons_size, sizeof(dynablock_t*));
for (int i=0; i<helper.sons_size; ++i) {
int created = 1;
dynablock_t *son = AddNewDynablock(block->parent, helper.sons_x64[i], &created);
if(created) { // avoid breaking a working block!
son->block = helper.sons_native[i];
son->x64_addr = (void*)helper.sons_x64[i];
son->x64_size = end-helper.sons_x64[i];
if(!son->x64_size) {printf_log(LOG_NONE, "Warning, son with null x64 size! (@%p / Native=%p)", son->x64_addr, son->block);}
son->father = block;
son->size = sz + son->block - block->block; // update size count, for debugging
//son->done = 1;
if(!son->parent)
son->parent = block->parent;
sons[sons_size] = son;
++sons_size;
}
}
if(sons_size) {
block->sons = sons;
block->sons_size = sons_size;
} else
box_free(sons);
}
box_free(helper.sons_x64);
helper.sons_x64 = NULL;
box_free(helper.sons_native);
helper.sons_native = NULL;
current_helper = NULL;
//block->done = 1;
return (void*)block;

View File

@ -0,0 +1,468 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <errno.h>
#include <string.h>
#include <math.h>
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "tools/bridge_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "emu/x87emu_private.h"
#include "x64trace.h"
#include "signals.h"
#include "dynarec_native.h"
#include "custommem.h"
#include "bridge.h"
#include "dynarec_native_functions.h"
void native_fstp(x64emu_t* emu, void* p)
{
if(ST0.q!=STld(0).uref)
D2LD(&ST0.d, p);
else
memcpy(p, &STld(0).ld, 10);
}
void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n)
{
(void)emu;
dynarec_log(LOG_DEBUG, "R%lu=0x%lx (%lu)\n", n, reg, reg);
}
void native_f2xm1(x64emu_t* emu)
{
ST0.d = exp2(ST0.d) - 1.0;
}
void native_fyl2x(x64emu_t* emu)
{
ST(1).d = log2(ST0.d)*ST(1).d;
}
void native_ftan(x64emu_t* emu)
{
ST0.d = tan(ST0.d);
emu->sw.f.F87_C2 = 0;
}
void native_fpatan(x64emu_t* emu)
{
ST1.d = atan2(ST1.d, ST0.d);
}
void native_fxtract(x64emu_t* emu)
{
int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52;
tmp32s -= 1023;
ST1.d /= exp2(tmp32s);
ST0.d = tmp32s;
}
void native_fprem(x64emu_t* emu)
{
int32_t tmp32s = ST0.d / ST1.d;
ST0.d -= ST1.d * tmp32s;
emu->sw.f.F87_C2 = 0;
emu->sw.f.F87_C0 = (tmp32s&1);
emu->sw.f.F87_C3 = ((tmp32s>>1)&1);
emu->sw.f.F87_C1 = ((tmp32s>>2)&1);
}
void native_fyl2xp1(x64emu_t* emu)
{
ST(1).d = log2(ST0.d + 1.0)*ST(1).d;
}
void native_fsincos(x64emu_t* emu)
{
sincos(ST1.d, &ST1.d, &ST0.d);
emu->sw.f.F87_C2 = 0;
}
void native_frndint(x64emu_t* emu)
{
ST0.d = fpu_round(emu, ST0.d);
}
void native_fscale(x64emu_t* emu)
{
if(ST0.d!=0.0)
ST0.d *= exp2(trunc(ST1.d));
}
void native_fsin(x64emu_t* emu)
{
ST0.d = sin(ST0.d);
emu->sw.f.F87_C2 = 0;
}
void native_fcos(x64emu_t* emu)
{
ST0.d = cos(ST0.d);
emu->sw.f.F87_C2 = 0;
}
void native_fbld(x64emu_t* emu, uint8_t* ed)
{
fpu_fbld(emu, ed);
}
void native_fild64(x64emu_t* emu, int64_t* ed)
{
int64_t tmp;
memcpy(&tmp, ed, sizeof(tmp));
ST0.d = tmp;
STll(0).sq = tmp;
STll(0).sref = ST0.sq;
}
void native_fbstp(x64emu_t* emu, uint8_t* ed)
{
fpu_fbst(emu, ed);
}
void native_fistp64(x64emu_t* emu, int64_t* ed)
{
// used of memcpy to avoid aligments issues
if(STll(0).sref==ST(0).sq) {
memcpy(ed, &STll(0).sq, sizeof(int64_t));
} else {
int64_t tmp;
if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d))
tmp = 0x8000000000000000LL;
else
tmp = fpu_round(emu, ST0.d);
memcpy(ed, &tmp, sizeof(tmp));
}
}
void native_fistt64(x64emu_t* emu, int64_t* ed)
{
// used of memcpy to avoid aligments issues
int64_t tmp = ST0.d;
memcpy(ed, &tmp, sizeof(tmp));
}
void native_fld(x64emu_t* emu, uint8_t* ed)
{
memcpy(&STld(0).ld, ed, 10);
LD2D(&STld(0), &ST(0).d);
STld(0).uref = ST0.q;
}
void native_ud(x64emu_t* emu)
{
emit_signal(emu, SIGILL, (void*)R_RIP, 0);
}
void native_priv(x64emu_t* emu)
{
emit_signal(emu, SIGSEGV, (void*)R_RIP, 0);
}
void native_fsave(x64emu_t* emu, uint8_t* ed)
{
fpu_savenv(emu, (char*)ed, 0);
uint8_t* p = ed;
p += 28;
for (int i=0; i<8; ++i) {
LD2D(p, &ST(i).d);
p+=10;
}
}
void native_frstor(x64emu_t* emu, uint8_t* ed)
{
fpu_loadenv(emu, (char*)ed, 0);
uint8_t* p = ed;
p += 28;
for (int i=0; i<8; ++i) {
D2LD(&ST(i).d, p);
p+=10;
}
}
void native_fprem1(x64emu_t* emu)
{
// simplified version
int32_t tmp32s = round(ST0.d / ST1.d);
ST0.d -= ST1.d*tmp32s;
emu->sw.f.F87_C2 = 0;
emu->sw.f.F87_C0 = (tmp32s&1);
emu->sw.f.F87_C3 = ((tmp32s>>1)&1);
emu->sw.f.F87_C1 = ((tmp32s>>2)&1);
}
static uint8_t ff_mult(uint8_t a, uint8_t b)
{
int retval = 0;
for(int i = 0; i < 8; i++) {
if((b & 1) == 1)
retval ^= a;
if((a & 0x80)) {
a <<= 1;
a ^= 0x1b;
} else {
a <<= 1;
}
b >>= 1;
}
return retval;
}
void native_aesimc(x64emu_t* emu, int xmm)
{
sse_regs_t eax1 = emu->xmm[xmm];
for(int j=0; j<4; ++j) {
emu->xmm[xmm].ub[0+j*4] = ff_mult(0x0E, eax1.ub[0+j*4]) ^ ff_mult(0x0B, eax1.ub[1+j*4]) ^ ff_mult(0x0D, eax1.ub[2+j*4]) ^ ff_mult(0x09, eax1.ub[3+j*4]);
emu->xmm[xmm].ub[1+j*4] = ff_mult(0x09, eax1.ub[0+j*4]) ^ ff_mult(0x0E, eax1.ub[1+j*4]) ^ ff_mult(0x0B, eax1.ub[2+j*4]) ^ ff_mult(0x0D, eax1.ub[3+j*4]);
emu->xmm[xmm].ub[2+j*4] = ff_mult(0x0D, eax1.ub[0+j*4]) ^ ff_mult(0x09, eax1.ub[1+j*4]) ^ ff_mult(0x0E, eax1.ub[2+j*4]) ^ ff_mult(0x0B, eax1.ub[3+j*4]);
emu->xmm[xmm].ub[3+j*4] = ff_mult(0x0B, eax1.ub[0+j*4]) ^ ff_mult(0x0D, eax1.ub[1+j*4]) ^ ff_mult(0x09, eax1.ub[2+j*4]) ^ ff_mult(0x0E, eax1.ub[3+j*4]);
}
}
void native_aesmc(x64emu_t* emu, int xmm)
{
sse_regs_t eax1 = emu->xmm[xmm];
for(int j=0; j<4; ++j) {
emu->xmm[xmm].ub[0+j*4] = ff_mult(0x02, eax1.ub[0+j*4]) ^ ff_mult(0x03, eax1.ub[1+j*4]) ^ eax1.ub[2+j*4] ^ eax1.ub[3+j*4] ;
emu->xmm[xmm].ub[1+j*4] = eax1.ub[0+j*4] ^ ff_mult(0x02, eax1.ub[1+j*4]) ^ ff_mult(0x03, eax1.ub[2+j*4]) ^ eax1.ub[3+j*4] ;
emu->xmm[xmm].ub[2+j*4] = eax1.ub[0+j*4] ^ eax1.ub[1+j*4] ^ ff_mult(0x02, eax1.ub[2+j*4]) ^ ff_mult(0x03, eax1.ub[3+j*4]);
emu->xmm[xmm].ub[3+j*4] = ff_mult(0x03, eax1.ub[0+j*4]) ^ eax1.ub[1+j*4] ^ eax1.ub[2+j*4] ^ ff_mult(0x02, eax1.ub[3+j*4]);
}
}
void native_aesdlast(x64emu_t* emu, int xmm)
{
// A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf
// A N K H E B O L I F C P M J G D
const uint8_t invshiftrows[] = {0,13,10, 7, 4, 1,14,11, 8, 5, 2,15,12, 9, 6, 3};
const uint8_t invsubbytes[256] = {
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
};
sse_regs_t eax1;
for(int i=0; i<16; ++i)
eax1.ub[i] = emu->xmm[xmm].ub[invshiftrows[i]];
//STATE ← InvSubBytes( STATE );
for(int i=0; i<16; ++i)
emu->xmm[xmm].ub[i] = invsubbytes[eax1.ub[i]];
}
static const uint8_t shiftrows[] = {0, 5,10,15, 4, 9,14, 3, 8,13, 2, 7,12, 1, 6,11};
static const uint8_t subbytes[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
};
void native_aeselast(x64emu_t* emu, int xmm)
{
// A0 B1 C2 D3 E4 F5 G6 H7 I8 J9 Ka Lb Mc Nd Oe Pf
// A F K P E J O D I N C H M B G L
sse_regs_t eax1;
for(int i=0; i<16; ++i)
eax1.ub[i] = emu->xmm[xmm].ub[shiftrows[i]];
//STATE ← SubBytes( STATE );
for(int i=0; i<16; ++i)
emu->xmm[xmm].ub[i] = subbytes[eax1.ub[i]];
}
void native_aesd(x64emu_t* emu, int xmm)
{
native_aesdlast(emu, xmm);
native_aesimc(emu, xmm);
}
void native_aese(x64emu_t* emu, int xmm)
{
native_aeselast(emu, xmm);
native_aesmc(emu, xmm);
}
void native_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8)
{
sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex];
sse_regs_t *GX = &emu->xmm[gx];
for (int i = 4; i < 8; ++i)
GX->ub[i] = subbytes[EX->ub[i]];
for (int i = 12; i < 16; ++i)
GX->ub[i] = subbytes[EX->ub[i]];
GX->ud[0] = GX->ud[1];
uint8_t tmp8u = GX->ub[4];
GX->ud[1] = GX->ud[1] >> 8;
GX->ub[7] = tmp8u;
GX->ud[1] ^= u8;
GX->ud[2] = GX->ud[3];
tmp8u = GX->ub[12];
GX->ud[3] = GX->ud[3] >> 8;
GX->ub[15] = tmp8u;
GX->ud[3] ^= u8;
}
void native_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8)
{
sse_regs_t *EX = p?((sse_regs_t*)p):&emu->xmm[ex];
sse_regs_t *GX = &emu->xmm[gx];
int g = (u8&1)?1:0;
int e = (u8&0b10000)?1:0;
__int128 result = 0;
__int128 op2 = EX->q[e];
for (int i=0; i<64; ++i)
if(GX->q[g]&(1LL<<i))
result ^= (op2<<i);
GX->q[0] = result&0xffffffffffffffffLL;
GX->q[1] = (result>>64)&0xffffffffffffffffLL;
}
void native_clflush(x64emu_t* emu, void* p)
{
cleanDBFromAddressRange((uintptr_t)p, 8, 0);
}
static int flagsCacheNeedsTransform(dynarec_native_t* dyn, int ninst) {
int jmp = dyn->insts[ninst].x64.jmp_insts;
if(jmp<0)
return 0;
if(dyn->insts[ninst].f_exit.dfnone) // flags are fully known, nothing we can do more
return 0;
/* if((dyn->f.pending!=SF_SET)
&& (dyn->f.pending!=SF_SET_PENDING)) {
if(dyn->f.pending!=SF_PENDING) {*/
switch (dyn->insts[jmp].f_entry.pending) {
case SF_UNKNOWN: return 0;
case SF_SET:
if(dyn->insts[ninst].f_exit.pending!=SF_SET && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING)
return 1;
else
return 0;
case SF_SET_PENDING:
if(dyn->insts[ninst].f_exit.pending!=SF_SET
&& dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING
&& dyn->insts[ninst].f_exit.pending!=SF_PENDING)
return 1;
else
return 0;
case SF_PENDING:
if(dyn->insts[ninst].f_exit.pending!=SF_SET
&& dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING
&& dyn->insts[ninst].f_exit.pending!=SF_PENDING)
return 1;
else
return (dyn->insts[jmp].f_entry.dfnone == dyn->insts[ninst].f_exit.dfnone)?0:1;
}
if(dyn->insts[jmp].f_entry.dfnone && !dyn->insts[ninst].f_exit.dfnone)
return 1;
return 0;
}
int CacheNeedsTransform(dynarec_native_t* dyn, int ninst) {
int ret = 0;
if (flagsCacheNeedsTransform(dyn, ninst)) ret|=1;
OTHER_CACHE()
return ret;
}
int isPred(dynarec_native_t* dyn, int ninst, int pred) {
for(int i=0; i<dyn->insts[ninst].pred_sz; ++i)
if(dyn->insts[ninst].pred[i]==pred)
return pred;
return -1;
}
int getNominalPred(dynarec_native_t* dyn, int ninst) {
if((ninst<=0) || !dyn->insts[ninst].pred_sz)
return -1;
if(isPred(dyn, ninst, ninst-1)!=-1)
return ninst-1;
return dyn->insts[ninst].pred[0];
}
#define F8 *(uint8_t*)(addr++)
// Do the GETED, but don't emit anything...
uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop)
{
(void)dyn; (void)addr; (void)ninst;
if((nextop&0xC0)==0xC0)
return addr;
if(!(nextop&0xC0)) {
if((nextop&7)==4) {
uint8_t sib = F8;
if((sib&0x7)==5) {
addr+=4;
}
} else if((nextop&7)==5) {
addr+=4;
}
} else {
if((nextop&7)==4) {
++addr;
}
if(nextop&0x80) {
addr+=4;
} else {
++addr;
}
}
return addr;
}
#undef F8
int isNativeCall(dynarec_native_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn)
{
(void)dyn;
#define PK(a) *(uint8_t*)(addr+a)
#define PK32(a) *(int32_t*)(addr+a)
if(!addr || !getProtection(addr))
return 0;
if(PK(0)==0xff && PK(1)==0x25) { // "absolute" jump, maybe the GOT (well, RIP relative in fact)
uintptr_t a1 = addr + 6 + (PK32(2)); // need to add a check to see if the address is from the GOT !
addr = (uintptr_t)getAlternate(*(void**)a1);
}
if(!addr || !getProtection(addr))
return 0;
onebridge_t *b = (onebridge_t*)(addr);
if(b->CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) {
// found !
if(retn) *retn = (b->C3==0xC2)?b->N:0;
if(calladdress) *calladdress = addr+1;
return 1;
}
return 0;
#undef PK32
#undef PK
}

View File

@ -0,0 +1,64 @@
#ifndef __DYNAREC_NATIVE_FUNCTIONS_H__
#define __DYNAREC_NATIVE_FUNCTIONS_H__
#include <stdint.h>
#include "dynarec_arch.h"
typedef struct x64emu_s x64emu_t;
void native_fstp(x64emu_t* emu, void* p);
void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n);
void native_f2xm1(x64emu_t* emu);
void native_fyl2x(x64emu_t* emu);
void native_ftan(x64emu_t* emu);
void native_fpatan(x64emu_t* emu);
void native_fxtract(x64emu_t* emu);
void native_fprem(x64emu_t* emu);
void native_fyl2xp1(x64emu_t* emu);
void native_fsincos(x64emu_t* emu);
void native_frndint(x64emu_t* emu);
void native_fscale(x64emu_t* emu);
void native_fsin(x64emu_t* emu);
void native_fcos(x64emu_t* emu);
void native_fbld(x64emu_t* emu, uint8_t* ed);
void native_fild64(x64emu_t* emu, int64_t* ed);
void native_fbstp(x64emu_t* emu, uint8_t* ed);
void native_fistp64(x64emu_t* emu, int64_t* ed);
void native_fistt64(x64emu_t* emu, int64_t* ed);
void native_fld(x64emu_t* emu, uint8_t* ed);
void native_fsave(x64emu_t* emu, uint8_t* ed);
void native_frstor(x64emu_t* emu, uint8_t* ed);
void native_fprem1(x64emu_t* emu);
void native_aesd(x64emu_t* emu, int xmm);
void native_aese(x64emu_t* emu, int xmm);
void native_aesdlast(x64emu_t* emu, int xmm);
void native_aeselast(x64emu_t* emu, int xmm);
void native_aesimc(x64emu_t* emu, int xmm);
void native_aeskeygenassist(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8);
void native_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8);
void native_clflush(x64emu_t* emu, void* p);
void native_ud(x64emu_t* emu);
void native_priv(x64emu_t* emu);
// Caches transformation (for loops) // Specific, need to be writen par backend
int CacheNeedsTransform(dynarec_native_t* dyn, int i1);
// predecessor access
int isPred(dynarec_native_t* dyn, int ninst, int pred);
int getNominalPred(dynarec_native_t* dyn, int ninst);
// Do the GETED, but don't emit anything...
uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop);
// Is what pointed at addr a native call? And if yes, to what function?
int isNativeCall(dynarec_native_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn);
ADDITIONNAL_DEFINITION()
#endif //__DYNAREC_NATIVE_FUNCTIONS_H__

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
@ -9,18 +6,19 @@
#include <string.h>
#include "debug.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynablock.h"
#include "dynarec_native.h"
#include "custommem.h"
#include "elfloader.h"
#include "dynarec_arch.h"
#include "dynarec_helper.h"
@ -33,18 +31,24 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
{
int ok = 1;
int ninst = 0;
int j64;
uintptr_t ip = addr;
uintptr_t init_addr = addr;
rex_t rex;
int rep; // 0 none, 1=F2 prefix, 2=F3 prefix
int need_epilog = 1;
dyn->sons_size = 0;
// Clean up (because there are multiple passes)
dyn->f.pending = 0;
dyn->f.dfnone = 0;
dyn->forward = 0;
dyn->forward_to = 0;
dyn->forward_size = 0;
dyn->forward_ninst = 0;
fpu_reset(dyn);
ARCH_INIT();
int reset_n = -1;
dyn->last_ip = (dyn->insts && dyn->insts[0].pred_sz)?0:ip; // RIP is always set at start of block unless there is a predecessor!
int stopblock = 2+(FindElfAddress(my_context, addr)?0:1); // if block is in elf_memory, it can be extended with bligblocks==2, else it needs 3
// ok, go now
INIT;
while(ok) {
@ -57,14 +61,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
dyn->f.pending = 0;
fpu_reset(dyn);
} else {
MESSAGE(LOG_DEBUG, "Reset Caches with %d\n",reset_n);
#if STEP > 1
// for STEP 2 & 3, just need to refrest with current, and undo the changes (push & swap)
dyn->n = dyn->insts[ninst].n;
neoncacheUnwind(&dyn->n);
#else
dyn->n = dyn->insts[reset_n].n;
#endif
fpu_reset_cache(dyn, ninst, reset_n);
dyn->f = dyn->insts[reset_n].f_exit;
if(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT) {
MESSAGE(LOG_DEBUG, "Apply Barrier Float\n");
@ -77,25 +74,27 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
}
}
reset_n = -1;
} else if(ninst && (dyn->insts[ninst].pred_sz!=1 || dyn->insts[ninst].pred[0]!=ninst-1))
} else if(ninst && (dyn->insts[ninst].pred_sz>1 || (dyn->insts[ninst].pred_sz==1 && dyn->insts[ninst].pred[0]!=ninst-1)))
dyn->last_ip = 0; // reset IP if some jump are comming here
// propagate ST stack state, especial stack pop that are defered
if(dyn->n.stack_pop) {
for(int j=0; j<24; ++j)
if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D || dyn->n.neoncache[j].t == NEON_CACHE_ST_F)) {
if(dyn->n.neoncache[j].n<dyn->n.stack_pop)
dyn->n.neoncache[j].v = 0;
else
dyn->n.neoncache[j].n-=dyn->n.stack_pop;
}
dyn->n.stack_pop = 0;
}
dyn->n.stack = dyn->n.stack_next;
dyn->n.news = 0;
dyn->n.stack_push = 0;
dyn->n.swapped = 0;
fpu_propagate_stack(dyn, ninst);
NEW_INST;
if(dyn->insts[ninst].pred_sz>1) {SMSTART();}
fpu_reset_scratch(dyn);
if((dyn->insts[ninst].x64.need_before&~X_PEND) && !dyn->insts[ninst].pred_sz) {
READFLAGS(dyn->insts[ninst].x64.need_before&~X_PEND);
}
#ifdef HAVE_TRACE
if(my_context->dec && box64_dynarec_trace) {
if((trace_end == 0)
|| ((ip >= trace_start) && (ip < trace_end))) {
MESSAGE(LOG_DUMP, "TRACE ----\n");
fpu_reflectcache(dyn, ninst, x1, x2, x3);
GO_TRACE();
MESSAGE(LOG_DUMP, "----------\n");
}
}
#endif
rep = 0;
uint8_t pk = PK(0);
while((pk==0xF2) || (pk==0xF3)) {
@ -132,6 +131,9 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
dyn->last_ip = 0;
}
}
#ifndef PROT_READ
#define PROT_READ 1
#endif
#if STEP != 0
if(!ok && !need_epilog && (addr < (dyn->start+dyn->isize))) {
ok = 1;
@ -150,7 +152,29 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
}
}
#else
if(!ok && !need_epilog && rvtrans_dynarec_bigblock && getProtection(addr+3)&~PROT_CUSTOM)
if(dyn->forward) {
if(dyn->forward_to == addr && !need_epilog) {
// we made it!
if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %p -> %p\n", dyn->forward_to-dyn->forward, (void*)dyn->forward, (void*)dyn->forward_to);
dyn->forward = 0;
dyn->forward_to = 0;
dyn->forward_size = 0;
dyn->forward_ninst = 0;
ok = 1; // in case it was 0
} else if ((dyn->forward_to < addr) || !ok) {
// something when wrong! rollback
if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Could not forward extend block for %d bytes %p -> %p\n", dyn->forward_to-dyn->forward, (void*)dyn->forward, (void*)dyn->forward_to);
ok = 0;
dyn->size = dyn->forward_size;
ninst = dyn->forward_ninst;
addr = dyn->forward;
dyn->forward = 0;
dyn->forward_to = 0;
dyn->forward_size = 0;
dyn->forward_ninst = 0;
}
// else just continue
} else if(!ok && !need_epilog && box64_dynarec_bigblock && (getProtection(addr+3)&~PROT_READ))
if(*(uint32_t*)addr!=0) { // check if need to continue (but is next 4 bytes are 0, stop)
uintptr_t next = get_closest_next(dyn, addr);
if(next && (
@ -165,16 +189,34 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
reset_n = ii;
ii=ninst;
}
if(rvtrans_dynarec_dump) dynarec_log(LOG_NONE, "Extend block %p, %p -> %p (ninst=%d, jump from %d)\n", dyn, (void*)addr, (void*)next, ninst, reset_n);
} else if(next && (next-addr)<30) {
if(rvtrans_dynarec_dump) dynarec_log(LOG_NONE, "Cannot extend block %p -> %p (%02X %02X %02X %02X %02X %02X %02X %02x)\n", (void*)addr, (void*)next, PK(0), PK(1), PK(2), PK(3), PK(4), PK(5), PK(6), PK(7));
if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Extend block %p, %p -> %p (ninst=%d, jump from %d)\n", dyn, (void*)addr, (void*)next, ninst, reset_n);
} else if(next && (next-addr)<box64_dynarec_forward && (getProtection(next)&PROT_READ)/*box64_dynarec_bigblock>=stopblock*/) {
dyn->forward = addr;
dyn->forward_to = next;
dyn->forward_size = dyn->size;
dyn->forward_ninst = ninst;
reset_n = -2;
ok = 1;
}
}
#endif
if(ok<0) {ok = 0; need_epilog=1;}
if(ok<0) {
ok = 0; need_epilog=1;
#if STEP == 0
if(ninst) {
--ninst;
if(!dyn->insts[ninst].x64.barrier) {
BARRIER(BARRIER_FLOAT);
}
dyn->insts[ninst].x64.need_after |= X_PEND;
++ninst;
}
#endif
}
++ninst;
#if STEP == 0
if(ok && !isJumpTableDefault64((void*)addr) && (rvtrans_dynarec_bigblock<2))
if(ok && (((box64_dynarec_bigblock<stopblock) && !isJumpTableDefault64((void*)addr))
|| (addr>=box64_nodynarec_start && addr<box64_nodynarec_end)))
#else
if(ok && (ninst==dyn->size))
#endif
@ -182,15 +224,14 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
int j32;
MAYUSE(j32);
MESSAGE(LOG_DEBUG, "Stopping block %p (%d / %d)\n",(void*)init_addr, ninst, dyn->size);
if(!box64_dynarec_dump && addr>=box64_nodynarec_start && addr<box64_nodynarec_end)
dynarec_log(LOG_INFO, "Stopping block in no-dynarec zone\n");
--ninst;
if(!dyn->insts[ninst].x64.barrier) {
BARRIER(BARRIER_FLOAT);
}
#if STEP == 0
if(dyn->insts[ninst].x64.set_flags)
dyn->insts[ninst].x64.default_need |= X_PEND;
else
dyn->insts[ninst].x64.use_flags |= X_PEND;
dyn->insts[ninst].x64.need_after |= X_PEND;
#endif
++ninst;
fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
@ -203,6 +244,6 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
jump_to_epilog(dyn, ip, 0, ninst); // no linker here, it's an unknow instruction
}
FINI;
MESSAGE(LOG_DUMP, "---- END OF BLOCK ---- (%d, %d sons)\n", dyn->size, dyn->sons_size);
MESSAGE(LOG_DUMP, "---- END OF BLOCK ---- (%d)\n", dyn->size);
return addr;
}

View File

@ -0,0 +1,15 @@
#ifndef __DYNAREC_NEXT_H__
#define __DYNAREC_NEXT_H__
#ifdef RV64
void rv64_next(void) EXPORTDYN;
void rv64_prolog(x64emu_t* emu, void* addr) EXPORTDYN;
void rv64_epilog() EXPORTDYN;
#define native_next rv64_next
#define native_prolog rv64_prolog
#define native_epilog rv64_epilog
#else
#error Unsupported architecture
#endif
#endif //__DYNAREC_NEXT_H__

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __DYNAREC_PRIVATE_H_
#define __DYNAREC_PRIVATE_H_
@ -27,7 +24,6 @@
#define SF_SUB 4
#define SF_SUBSET (SF_SUB|SF_SET)
#define SF_SUBSET_PENDING (SF_SUBSET|SF_PENDING)
#define SF_MAYSET 8
typedef struct instruction_x64_s {
uintptr_t addr; //address of the instruction
@ -40,9 +36,10 @@ typedef struct instruction_x64_s {
uint8_t state_flags;// One of SF_XXX state
uint8_t use_flags; // 0 or combination of X_?F
uint8_t set_flags; // 0 or combination of X_?F
uint8_t default_need;// 0 or X_PEND basically
uint8_t need_flags; // calculated
uint8_t old_use; // calculated
uint8_t may_set; // 1 if the flags may not be set
uint8_t gen_flags; // calculated
uint8_t need_before;// calculated
uint8_t need_after; // calculated
} instruction_x64_t;
void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name);

View File

@ -1,9 +1,48 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __NATIVE_LOCK__H__
#define __NATIVE_LOCK__H__
#ifdef RV64
#include "rv64/rv64_lock.h"
#define USE_CAS
// RV64 is quite strict (or at least strongly recommand) on what you can do between an LD.A and an SD.A
// That basicaly forbid to call a function, so there cannot be READ / WRITE separated
// And so need to use a Compare and Swap mecanism instead
// no byte or 2-bytes atomic access on RISC-V
#define native_lock_xchg_dd(A, B) rv64_lock_xchg_dd(A, B)
#define native_lock_xchg_d(A, B) rv64_lock_xchg_d(A, B)
#define native_lock_storeifref(A, B, C) rv64_lock_storeifref(A, B, C)
#define native_lock_storeifref_d(A, B, C) rv64_lock_storeifref_d(A, B, C)
#define native_lock_storeifref2_d(A, B, C) rv64_lock_storeifref2_d(A, B, C)
#define native_lock_storeifnull(A, B) rv64_lock_storeifnull(A, B)
#define native_lock_storeifnull_d(A, B) rv64_lock_storeifnull_d(A, B)
#define native_lock_decifnot0b(A) rv64_lock_decifnot0b(A)
#define native_lock_storeb(A, B) rv64_lock_storeb(A, B)
#define native_lock_incif0(A) rv64_lock_incif0(A)
#define native_lock_decifnot0(A) rv64_lock_decifnot0(A)
#define native_lock_store(A, B) rv64_lock_store(A, B)
#define native_lock_cas_d(A, B, C) rv64_lock_cas_d(A, B, C)
#define native_lock_cas_dd(A, B, C) rv64_lock_cas_dd(A, B, C)
#define native_lock_xchg_b(A, B) rv64_lock_xchg_b(A, B)
#define native_lock_cas_b(A, B, C) rv64_lock_cas_b(A, B, C)
#define native_lock_cas_h(A, B, C) rv64_lock_cas_h(A, B, C)
#define native_lock_read_b(A) tmpcas=*(uint8_t*)(A)
#define native_lock_write_b(A, B) rv64_lock_cas_b(A, tmpcas, B)
#define native_lock_read_h(A) tmpcas=*(uint16_t*)(A)
#define native_lock_write_h(A, B) rv64_lock_cas_h(A, tmpcas, B)
#define native_lock_read_d(A) tmpcas=*(uint32_t*)(A)
#define native_lock_write_d(A, B) rv64_lock_cas_d(A, tmpcas, B)
#define native_lock_read_dd(A) tmpcas=*(uint64_t*)(A)
#define native_lock_write_dd(A, B) rv64_lock_cas_dd(A, tmpcas, B)
// there is no atomic move on 16bytes, so faking it
#define native_lock_read_dq(A, B, C) *A=tmpcas=((uint64_t*)(C))[0]; *B=((uint64_t*)(C))[1];
#define native_lock_write_dq(A, B, C) rv64_lock_cas_dq(C, A, tmpcas, B);
#else
#error Unsupported architecture
#endif
#endif //#define __NATIVE_LOCK__H__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,394 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "my_cpuid.h"
#include "emu/x87emu_private.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"
uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog)
{
(void)ip; (void)need_epilog;
uint8_t opcode = F8;
uint8_t nextop, u8;
uint8_t gd, ed;
uint8_t wback, wb2;
uint8_t eb1, eb2;
int32_t i32, i32_;
int cacheupd = 0;
int v0, v1;
int q0, q1;
int d0, d1;
int s0;
uint64_t tmp64u;
int64_t j64;
int64_t fixedaddress;
int unscaled;
MAYUSE(wb2);
MAYUSE(eb1);
MAYUSE(eb2);
MAYUSE(q0);
MAYUSE(q1);
MAYUSE(d0);
MAYUSE(d1);
MAYUSE(s0);
MAYUSE(j64);
MAYUSE(cacheupd);
switch(opcode) {
case 0x01:
INST_NAME("FAKE xgetbv");
nextop = F8;
addr = fakeed(dyn, addr, ninst, nextop);
SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state
GETIP(ip);
STORE_XEMU_CALL();
CALL(native_ud, -1);
LOAD_XEMU_CALL();
jump_to_epilog(dyn, 0, xRIP, ninst);
*need_epilog = 0;
*ok = 0;
break;
case 0x05:
INST_NAME("SYSCALL");
SMEND();
GETIP(addr);
STORE_XEMU_CALL();
CALL_S(x64Syscall, -1);
LOAD_XEMU_CALL();
TABLE64(x3, addr); // expected return address
BNE_MARK(xRIP, x3);
LW(w1, xEmu, offsetof(x64emu_t, quit));
CBZ_NEXT(w1);
MARK;
LOAD_XEMU_REM();
jump_to_epilog(dyn, 0, xRIP, ninst);
break;
case 0x09:
INST_NAME("WBINVD");
SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state
GETIP(ip);
STORE_XEMU_CALL();
CALL(native_ud, -1);
LOAD_XEMU_CALL();
jump_to_epilog(dyn, 0, xRIP, ninst);
*need_epilog = 0;
*ok = 0;
break;
case 0x0B:
INST_NAME("UD2");
SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state
GETIP(ip);
STORE_XEMU_CALL();
CALL(native_ud, -1);
LOAD_XEMU_CALL();
jump_to_epilog(dyn, 0, xRIP, ninst);
*need_epilog = 0;
*ok = 0;
break;
case 0x18:
nextop = F8;
if((nextop&0xC0)==0xC0) {
INST_NAME("NOP (multibyte)");
} else
switch((nextop>>3)&7) {
case 0:
DEFAULT;
break;
case 1:
DEFAULT;
break;
case 2:
DEFAULT;
break;
case 3:
DEFAULT;
break;
default:
INST_NAME("NOP (multibyte)");
FAKEED;
}
break;
case 0x1F:
INST_NAME("NOP (multibyte)");
nextop = F8;
FAKEED;
break;
case 0x31:
INST_NAME("RDTSC");
MESSAGE(LOG_DUMP, "Need Optimization\n");
CALL(ReadTSC, xRAX); // will return the u64 in xEAX
SRLI(xRDX, xRAX, 32);
ZEROUP(xRAX); // wipe upper part
break;
#define GO(GETFLAGS, NO, YES, F) \
READFLAGS(F); \
GETFLAGS; \
nextop=F8; \
GETGD; \
if(MODREG) { \
ed = xRAX+(nextop&7)+(rex.b<<3); \
B##NO(x1, 8); \
MV(gd, ed); \
} else { \
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \
B##NO(x1, 8); \
LDxw(gd, ed, fixedaddress); \
} \
if(!rex.w) ZEROUP(gd);
GOCOND(0x40, "CMOV", "Gd, Ed");
#undef GO
case 0x77:
INST_NAME("EMMS");
// empty MMX, FPU now usable
mmx_purgecache(dyn, ninst, 0, x1);
/*emu->top = 0;
emu->fpu_stack = 0;*/ //TODO: Check if something is needed here?
break;
#define GO(GETFLAGS, NO, YES, F) \
READFLAGS(F); \
i32_ = F32S; \
BARRIER(BARRIER_MAYBE); \
JUMP(addr+i32_, 1); \
GETFLAGS; \
if(dyn->insts[ninst].x64.jmp_insts==-1 || \
CHECK_CACHE()) { \
/* out of the block */ \
i32 = dyn->insts[ninst].epilog-(dyn->native_size); \
B##NO##_safe(x1, i32); \
if(dyn->insts[ninst].x64.jmp_insts==-1) { \
if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \
fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \
jump_to_next(dyn, addr+i32_, 0, ninst); \
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
B(i32); \
} \
} else { \
/* inside the block */ \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
B##YES##_safe(x1, i32); \
}
GOCOND(0x80, "J", "Id");
#undef GO
#define GO(GETFLAGS, NO, YES, F) \
READFLAGS(F); \
GETFLAGS; \
nextop=F8; \
S##YES(x3, x1); \
if(MODREG) { \
if(rex.rex) { \
eb1= xRAX+(nextop&7)+(rex.b<<3); \
eb2 = 0; \
} else { \
ed = (nextop&7); \
eb2 = (ed>>2)*8; \
eb1 = xRAX+(ed&3); \
} \
if (eb2) { \
LUI(x1, 0xffffffffffff0); \
ORI(x1, x1, 0xff); \
AND(eb1, eb1, x1); \
} else { \
ANDI(eb1, eb1, 0xf00); \
} \
OR(eb1, eb1, x3); \
} else { \
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress,rex, NULL, 1, 0); \
SB(x3, ed, fixedaddress); \
SMWRITE(); \
}
GOCOND(0x90, "SET", "Eb");
#undef GO
case 0xA2:
INST_NAME("CPUID");
MV(A1, xRAX);
CALL_(my_cpuid, -1, 0);
// BX and DX are not synchronized durring the call, so need to force the update
LD(xRDX, xEmu, offsetof(x64emu_t, regs[_DX]));
LD(xRBX, xEmu, offsetof(x64emu_t, regs[_BX]));
break;
case 0xAE:
nextop = F8;
if((nextop&0xF8)==0xE8) {
INST_NAME("LFENCE");
SMDMB();
} else
if((nextop&0xF8)==0xF0) {
INST_NAME("MFENCE");
SMDMB();
} else
if((nextop&0xF8)==0xF8) {
INST_NAME("SFENCE");
SMDMB();
} else {
switch((nextop>>3)&7) {
case 7:
INST_NAME("CLFLUSH Ed");
MESSAGE(LOG_DUMP, "Need Optimization?\n");
addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
if(wback!=A1) {
MV(A1, wback);
}
CALL_(native_clflush, -1, 0);
break;
default:
DEFAULT;
}
}
break;
case 0xAF:
INST_NAME("IMUL Gd, Ed");
SETFLAGS(X_ALL, SF_PENDING);
nextop = F8;
GETGD;
GETED(0);
if(rex.w) {
// 64bits imul
UFLAG_IF {
MULH(x3, gd, ed);
MUL(gd, gd, ed);
UFLAG_OP1(x3);
UFLAG_RES(gd);
UFLAG_DF(x3, d_imul64);
} else {
MULxw(gd, gd, ed);
}
} else {
// 32bits imul
UFLAG_IF {
MUL(gd, gd, ed);
UFLAG_RES(gd);
SRLI(x3, gd, 32);
UFLAG_OP1(x3);
UFLAG_DF(x3, d_imul32);
SEXT_W(gd, gd);
} else {
MULxw(gd, gd, ed);
}
}
break;
case 0xB6:
INST_NAME("MOVZX Gd, Eb");
nextop = F8;
GETGD;
if(MODREG) {
if(rex.rex) {
eb1 = xRAX+(nextop&7)+(rex.b<<3);
eb2 = 0; \
} else {
ed = (nextop&7);
eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx
eb2 = (ed&4)>>2; // L or H
}
if (eb2) {
SRLI(gd, eb1, 8);
ANDI(gd, gd, 0xff);
} else {
ANDI(gd, eb1, 0xff);
}
} else {
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
LBU(gd, ed, fixedaddress);
}
break;
case 0xB7:
INST_NAME("MOVZX Gd, Ew");
nextop = F8;
GETGD;
if(MODREG) {
ed = xRAX+(nextop&7)+(rex.b<<3);
SLLI(gd, ed, 48);
SRLI(gd, gd, 48);
} else {
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
LHU(gd, ed, fixedaddress);
}
break;
case 0xBE:
INST_NAME("MOVSX Gd, Eb");
nextop = F8;
GETGD;
if(MODREG) {
if(rex.rex) {
wback = xRAX+(nextop&7)+(rex.b<<3);
wb2 = 0;
} else {
wback = (nextop&7);
wb2 = (wback>>2)*8;
wback = xRAX+(wback&3);
}
SLLI(gd, wback, 56-wb2);
SRAI(gd, gd, 56);
} else {
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, NULL, 1, 0);
LB(gd, ed, fixedaddress);
}
if(!rex.w)
ZEROUP(gd);
break;
case 0xBF:
INST_NAME("MOVSX Gd, Ew");
nextop = F8;
GETGD;
if(MODREG) {
ed = xRAX+(nextop&7)+(rex.b<<3);
SLLI(gd, ed, 48);
SRAI(gd, gd, 48);
} else {
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, NULL, 1, 0);
LH(gd, ed, fixedaddress);
}
if(!rex.w)
ZEROUP(gd);
break;
default:
DEFAULT;
}
return addr;
}

View File

@ -0,0 +1,85 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "custommem.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_helper.h"
#include "dynarec_rv64_functions.h"
#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3)
uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog)
{
(void)ip; (void)rep; (void)need_epilog;
uint8_t opcode = F8;
uint8_t nextop;
uint8_t u8;
uint8_t gd, ed, eb1, eb2, gb1, gb2;
uint8_t wback, wb1, wb2, wb;
int64_t i64, j64;
int v0, v1;
int q0;
int d0;
int64_t fixedaddress;
int unscaled;
MAYUSE(eb1);
MAYUSE(eb2);
MAYUSE(wb1);
MAYUSE(wb2);
MAYUSE(gb1);
MAYUSE(gb2);
MAYUSE(j64);
MAYUSE(d0);
MAYUSE(q0);
MAYUSE(v0);
MAYUSE(v1);
while((opcode==0xF2) || (opcode==0xF3)) {
rep = opcode-0xF1;
opcode = F8;
}
// REX prefix before the F0 are ignored
rex.rex = 0;
while(opcode>=0x40 && opcode<=0x4f) {
rex.rex = opcode;
opcode = F8;
}
switch(opcode) {
case 0x8B:
INST_NAME("MOV Gd, Seg:Ed");
grab_segdata(dyn, addr, ninst, x4, seg);
nextop=F8;
GETGD;
if(MODREG) { // reg <= reg
MVxw(gd, xRAX+(nextop&7)+(rex.b<<3));
} else { // mem <= reg
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0);
ADD(x4, ed, x4);
LDxw(gd, x4, 0);
}
break;
default:
DEFAULT;
}
return addr;
}

View File

@ -0,0 +1,259 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_helper.h"
#include "dynarec_rv64_functions.h"
uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
{
uint8_t opcode = F8;
uint8_t nextop, u8;
int16_t i16;
uint16_t u16;
uint64_t u64;
int32_t i32;
int64_t j64;
uint8_t gd, ed;
uint8_t wback, wb1;
int64_t fixedaddress;
int unscaled;
int lock;
MAYUSE(u8);
MAYUSE(u16);
MAYUSE(u64);
MAYUSE(j64);
MAYUSE(lock);
while((opcode==0x2E) || (opcode==0x36) || (opcode==0x66)) // ignoring CS:, SS: or multiple 0x66
opcode = F8;
while((opcode==0xF2) || (opcode==0xF3)) {
rep = opcode-0xF1;
opcode = F8;
}
// REX prefix before the 66 are ignored
rex.rex = 0;
while(opcode>=0x40 && opcode<=0x4f) {
rex.rex = opcode;
opcode = F8;
}
if(rex.w && opcode!=0x0f) // rex.w cancels "66", but not for 66 0f type of prefix
return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode
switch(opcode) {
case 0x0F:
addr = dynarec64_660F(dyn, addr, ip, ninst, rex, ok, need_epilog);
break;
case 0x3D:
INST_NAME("CMP AX, Iw");
SETFLAGS(X_ALL, SF_SET_PENDING);
i32 = F16;
SLLI(x1, xRAX, 48);
SRLI(x1, x1, 48);
if(i32) {
MOV32w(x2, i32);
emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6);
} else {
emit_cmp16_0(dyn, ninst, x1, x3, x4);
}
break;
case 0x70:
case 0x71:
case 0x72:
case 0x73:
case 0x74:
case 0x75:
case 0x76:
case 0x77:
case 0x78:
case 0x79:
case 0x7a:
case 0x7b:
case 0x7c:
case 0x7d:
case 0x7e:
case 0x7f:
// just use regular conditional jump
return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog);
case 0x81:
case 0x83:
nextop = F8;
switch((nextop>>3)&7) {
case 1: // OR
if(opcode==0x81) {INST_NAME("OR Ew, Iw");} else {INST_NAME("OR Ew, Ib");}
SETFLAGS(X_ALL, SF_SET_PENDING);
GETEW(x1, (opcode==0x81)?2:1);
if(opcode==0x81) i16 = F16S; else i16 = F8S;
MOV64x(x5, i16);
emit_or16(dyn, ninst, x1, x5, x2, x4);
EWBACK;
break;
default:
DEFAULT;
}
break;
case 0x89:
INST_NAME("MOV Ew, Gw");
nextop = F8;
GETGD;
if(MODREG) {
ed = xRAX+(nextop&7)+(rex.b<<3);
if(ed!=gd) {
// we don't use GETGW above, so we need let gd & 0xffff.
LUI(x1, 0xffff0);
AND(ed, ed, x1);
SLLI(x2, gd, 48);
SRLI(x2, x2, 48);
OR(ed, ed, x2);
}
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 0, 0);
SH(gd, ed, fixedaddress);
SMWRITELOCK(lock);
}
break;
case 0xC1:
nextop = F8;
switch((nextop>>3)&7) {
case 0:
INST_NAME("ROL Ew, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_OF|X_CF, SF_SET);
GETEW(x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(rol16, x1, x3);
EWBACK;
break;
case 1:
INST_NAME("ROR Ew, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
SETFLAGS(X_OF|X_CF, SF_SET);
GETEW(x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(ror16, x1, x3);
EWBACK;
break;
case 2:
INST_NAME("RCL Ew, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
GETEW(x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(rcl16, x1, x3);
EWBACK;
break;
case 3:
INST_NAME("RCR Ew, Ib");
MESSAGE(LOG_DUMP, "Need Optimization\n");
READFLAGS(X_CF);
SETFLAGS(X_OF|X_CF, SF_SET);
GETEW(x1, 1);
u8 = F8;
MOV32w(x2, u8);
CALL_(rcr16, x1, x3);
EWBACK;
break;
case 4:
case 6:
INST_NAME("SHL Ew, Ib");
UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
SETFLAGS(X_ALL, SF_PENDING);
GETEW(x1, 1);
u8 = F8;
UFLAG_IF {MOV32w(x2, (u8&0x1f));}
UFLAG_OP12(ed, x2)
if(MODREG) {
SLLI(ed, ed, 48+(u8&0x1f));
SRLI(ed, ed, 48);
} else {
SLLI(ed, ed, u8&0x1f);
}
EWBACK;
UFLAG_RES(ed);
UFLAG_DF(x3, d_shl16);
break;
case 5:
INST_NAME("SHR Ed, Ib");
UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
SETFLAGS(X_ALL, SF_PENDING);
GETEW(x1, 1);
u8 = F8;
UFLAG_IF {MOV32w(x2, (u8&0x1f));}
UFLAG_OP12(ed, x2)
SRLI(ed, ed, u8&0x1f);
EWBACK;
UFLAG_RES(ed);
UFLAG_DF(x3, d_shr16);
break;
case 7:
INST_NAME("SAR Ed, Ib");
SETFLAGS(X_ALL, SF_PENDING);
UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
GETSEW(x1, 1);
u8 = F8;
UFLAG_IF {MOV32w(x2, (u8&0x1f));}
UFLAG_OP12(ed, x2)
SRAI(ed, ed, u8&0x1f);
if(MODREG) {
SLLI(ed, ed, 48);
SRLI(ed, ed, 48);
}
EWBACK;
UFLAG_RES(ed);
UFLAG_DF(x3, d_sar16);
break;
}
break;
case 0xC7:
INST_NAME("MOV Ew, Iw");
nextop = F8;
if(MODREG) {
ed = xRAX+(nextop&7)+(rex.b<<3);
ADDI(x1, xZR, -1);
SRLI(x1, x1, 48);
AND(ed, ed, x1);
u16 = F16;
MOV32w(x1, u16);
ORI(ed, ed, x1);
} else {
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 2);
u16 = F16;
MOV32w(x1, u16);
SH(x1, ed, fixedaddress);
SMWRITELOCK(lock);
}
break;
default:
DEFAULT;
}
return addr;
}

View File

@ -0,0 +1,87 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"
uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog)
{
(void)ip; (void)need_epilog;
uint8_t opcode = F8;
uint8_t nextop, u8;
int32_t i32;
uint8_t gd, ed;
uint8_t wback, wb1, wb2;
uint8_t eb1, eb2;
int64_t j64;
uint64_t tmp64u, tmp64u2;
int v0, v1;
int q0, q1;
int d0, d1;
int64_t fixedaddress;
int unscaled;
MAYUSE(d0);
MAYUSE(d1);
MAYUSE(q0);
MAYUSE(q1);
MAYUSE(eb1);
MAYUSE(eb2);
MAYUSE(j64);
switch(opcode) {
case 0x1F:
INST_NAME("NOP (multibyte)");
nextop = F8;
FAKEED;
break;
#define GO(GETFLAGS, NO, YES, F) \
READFLAGS(F); \
GETFLAGS; \
nextop=F8; \
GETGD; \
if(MODREG) { \
ed = xRAX+(nextop&7)+(rex.b<<3); \
SLLI(x4, ed, 48); \
SRLI(x4, x4, 48); \
} else { \
SMREAD(); \
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \
LHU(x4, ed, fixedaddress); \
ed = x4; \
} \
B##NO(x1, 4+4*4); \
ADDI(x3, xZR, -1); \
SRLI(x3, x3, 48); \
AND(gd, gd, x3); \
OR(gd, gd, ed);
GOCOND(0x40, "CMOV", "Gw, Ew");
#undef GO
default:
DEFAULT;
}
return addr;
}

View File

@ -0,0 +1,383 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "emu/x87emu_private.h"
#include "dynarec_native.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_helper.h"
#include "dynarec_rv64_functions.h"
uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
{
(void)ip; (void)rep; (void)need_epilog;
uint8_t nextop = F8;
uint8_t ed;
uint8_t wback, wb1;
uint8_t u8;
int64_t fixedaddress;
int unscaled;
int v1, v2;
int s0;
int i1, i2, i3;
MAYUSE(s0);
MAYUSE(v2);
MAYUSE(v1);
switch(nextop) {
case 0xC0:
case 0xC1:
case 0xC2:
case 0xC3:
case 0xC4:
case 0xC5:
case 0xC6:
case 0xC7:
INST_NAME("FLD STx");
v2 = x87_do_push(dyn, ninst, x1, X87_ST(nextop&7));
v1 = x87_get_st(dyn, ninst, x1, x2, (nextop&7)+1, X87_COMBINE(0, (nextop&7)+1));
if(ST_IS_F(0)) {
FMVS(v2, v1);
} else {
FMVD(v2, v1);
}
break;
case 0xC8:
INST_NAME("FXCH ST0");
break;
case 0xC9:
case 0xCA:
case 0xCB:
case 0xCC:
case 0xCD:
case 0xCE:
case 0xCF:
INST_NAME("FXCH STx");
// swap the cache value, not the double value itself :p
x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_ST(nextop&7));
x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
x87_swapreg(dyn, ninst, x1, x2, 0, nextop&7);
// should set C1 to 0
break;
case 0xD0:
INST_NAME("FNOP");
break;
case 0xD8:
INST_NAME("FSTPNCE ST0, ST0");
x87_do_pop(dyn, ninst, x3);
break;
case 0xD9:
case 0xDA:
case 0xDB:
case 0xDC:
case 0xDD:
case 0xDE:
case 0xDF:
INST_NAME("FSTPNCE ST0, STx");
// copy the cache value for st0 to stx
x87_get_st_empty(dyn, ninst, x1, x2, nextop&7, X87_ST(nextop&7));
x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
x87_swapreg(dyn, ninst, x1, x2, 0, nextop&7);
x87_do_pop(dyn, ninst, x3);
break;
case 0xE0:
INST_NAME("FCHS");
v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
if(ST_IS_F(0)) {
FNEGS(v1, v1);
} else {
FNEGD(v1, v1);
}
break;
case 0xE1:
INST_NAME("FABS");
v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
if(ST_IS_F(0)) {
FABSS(v1, v1);
} else {
FABSD(v1, v1);
}
break;
case 0xE4:
INST_NAME("FTST");
DEFAULT
break;
case 0xE5:
INST_NAME("FXAM");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_refresh(dyn, ninst, x1, x2, 0);
CALL(fpu_fxam, -1); // should be possible inline, but is it worth it?
break;
case 0xE8:
INST_NAME("FLD1");
v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_F);
if(ST_IS_F(0)) {
MOV32w(x1, 0x3f800000);
FMVWX(v1, x1);
} else {
MOV64x(x1, 0x3FF0000000000000);
FMVDX(v1, x1);
}
break;
case 0xE9:
INST_NAME("FLDL2T");
v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_D);
FTABLE64(v1, L2T);
break;
case 0xEA:
INST_NAME("FLDL2E");
v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_D);
FTABLE64(v1, L2E);
break;
case 0xEB:
INST_NAME("FLDPI");
v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_D);
FTABLE64(v1, PI);
break;
case 0xEC:
INST_NAME("FLDLG2");
v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_D);
FTABLE64(v1, LG2);
break;
case 0xED:
INST_NAME("FLDLN2");
v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_D);
FTABLE64(v1, LN2);
break;
case 0xEE:
INST_NAME("FLDZ");
v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_F);
if(ST_IS_F(0)) {
FMVWX(v1, xZR);
} else {
FMVDX(v1, xZR);
}
break;
case 0xF0:
INST_NAME("F2XM1");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
CALL(native_f2xm1, -1);
break;
case 0xF1:
INST_NAME("FYL2X");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(native_fyl2x, -1);
x87_do_pop(dyn, ninst, x3);
break;
case 0xF2:
INST_NAME("FPTAN");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
CALL(native_ftan, -1);
v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_F);
if(ST_IS_F(0)) {
MOV32w(x1, 0x3f800000);
FMVWX(v1, x1);
} else {
MOV64x(x1, 0x3FF0000000000000);
FMVDX(v1, x1);
}
break;
case 0xF3:
INST_NAME("FPATAN");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(native_fpatan, -1);
x87_do_pop(dyn, ninst, x3);
break;
case 0xF4:
INST_NAME("FXTRACT");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_do_push_empty(dyn, ninst, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(native_fxtract, -1);
break;
case 0xF5:
INST_NAME("FPREM1");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(native_fprem1, -1);
break;
case 0xF6:
INST_NAME("FDECSTP");
fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
LW(x2, xEmu, offsetof(x64emu_t, top));
ADDI(x2, x2, -1);
ANDI(x2, x2, 7);
SW(x2, xEmu, offsetof(x64emu_t, top));
break;
case 0xF7:
INST_NAME("FINCSTP");
fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
LW(x2, xEmu, offsetof(x64emu_t, top));
ADDI(x2, x2, 1);
ANDI(x2, x2, 7);
SW(x2, xEmu, offsetof(x64emu_t, top));
break;
case 0xF8:
INST_NAME("FPREM");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(native_fprem, -1);
break;
case 0xF9:
INST_NAME("FYL2XP1");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(native_fyl2xp1, -1);
x87_do_pop(dyn, ninst, x3);
break;
case 0xFA:
INST_NAME("FSQRT");
DEFAULT;
break;
case 0xFB:
INST_NAME("FSINCOS");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_do_push_empty(dyn, ninst, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(native_fsincos, -1);
break;
case 0xFC:
INST_NAME("FRNDINT");
DEFAULT;
break;
case 0xFD:
INST_NAME("FSCALE");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
x87_forget(dyn, ninst, x1, x2, 1);
CALL(native_fscale, -1);
break;
case 0xFE:
INST_NAME("FSIN");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
CALL(native_fsin, -1);
break;
case 0xFF:
INST_NAME("FCOS");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_forget(dyn, ninst, x1, x2, 0);
CALL(native_fcos, -1);
break;
case 0xD1:
case 0xD4:
case 0xD5:
case 0xD6:
case 0xD7:
case 0xE2:
case 0xE3:
case 0xE6:
case 0xE7:
case 0xEF:
DEFAULT;
break;
default:
switch((nextop>>3)&7) {
case 0:
INST_NAME("FLD ST0, float[ED]");
v1 = x87_do_push(dyn, ninst, x1, box64_dynarec_x87double?EXT_CACHE_ST_D:EXT_CACHE_ST_F);
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
FLW(v1, ed, fixedaddress);
if(!ST_IS_F(0)) {
FCVTDS(v1, v1);
}
break;
case 2:
INST_NAME("FST float[ED], ST0");
v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
if(ST_IS_F(0))
s0 = v1;
else {
s0 = fpu_get_scratch(dyn);
FCVTSD(s0, v1);
}
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
FSW(s0, ed, fixedaddress);
break;
case 3:
INST_NAME("FSTP float[ED], ST0");
v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
if(!ST_IS_F(0)) {
FCVTSD(v1, v1);
}
FSW(v1, ed, fixedaddress);
x87_do_pop(dyn, ninst, x3);
break;
case 4:
INST_NAME("FLDENV Ed");
MESSAGE(LOG_DUMP, "Need Optimization\n");
fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
if(ed!=x1) {
MV(x1, ed);
}
MOV32w(x2, 0);
CALL(fpu_loadenv, -1);
break;
case 5:
INST_NAME("FLDCW Ew");
GETEW(x1, 0);
SH(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8
break;
case 6:
INST_NAME("FNSTENV Ed");
MESSAGE(LOG_DUMP, "Need Optimization\n");
fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
if(ed!=x1) {
MV(x1, ed);
}
MOV32w(x2, 0);
CALL(fpu_savenv, -1);
break;
case 7:
INST_NAME("FNSTCW Ew");
addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 0, 0);
ed = x1;
wb1 = 1;
LH(x1, xEmu, offsetof(x64emu_t, cw));
EWBACK;
break;
default:
DEFAULT;
}
}
return addr;
}

View File

@ -0,0 +1,324 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "../tools/bridge_private.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"
// emit XOR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s4, rex.w?d_xor64:d_xor32);
} else IFX(X_ALL) {
SET_DFNONE();
}
XOR(s1, s1, s2);
// test sign bit before zeroup.
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w && s1!=s2) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit XOR32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch
void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s4, rex.w?d_xor64:d_xor32);
} else IFX(X_ALL) {
SET_DFNONE();
}
if(c>=-2048 && c<=2047) {
XORI(s1, s1, c);
} else {
MOV64xw(s3, c);
XOR(s1, s1, s3);
}
// test sign bit before zeroup.
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit OR16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) {
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s3, d_or16);
} else IFX(X_ALL) {
SET_DFNONE();
}
OR(s1, s1, s2);
SLLI(s1, s1, 48);
SRLI(s1, s1, 48);
IFX(X_PEND) {
SD(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit OR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s4, rex.w?d_or64:d_or32);
} else IFX(X_ALL) {
SET_DFNONE();
}
OR(s1, s1, s2);
// test sign bit before zeroup.
IFX(X_SF) {
if (!rex.w) SEXT_W(s1, s1);
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit OR32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch
void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s4, rex.w?d_or64:d_or32);
} else IFX(X_ALL) {
SET_DFNONE();
}
if(c>=-2048 && c<=2047) {
ORI(s1, s1, c);
} else {
MOV64xw(s3, c);
OR(s1, s1, s3);
}
// test sign bit before zeroup.
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit AND8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s3, d_and8);
} else IFX(X_ALL) {
SET_DFNONE();
}
ANDI(s1, s1, c&0xff);
IFX(X_PEND) {
SD(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit AND32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_and32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s3, rex.w?d_tst64:d_tst32);
} else IFX(X_ALL) {
SET_DFNONE();
}
AND(s1, s1, s2); // res = s1 & s2
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
if (!rex.w) ZEROUP(s1);
SRLI(s3, s1, rex.w?63:31);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit AND32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch
void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s3, rex.w?d_tst64:d_tst32);
} else IFX(X_ALL) {
SET_DFNONE();
}
if(c>=-2048 && c<=2047) {
ANDI(s1, s1, c);
} else {
MOV64xw(s3, c);
AND(s1, s1, s3); // res = s1 & s2
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
if (!rex.w) ZEROUP(s1);
SRLI(s3, s1, rex.w?63:31);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit OR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
void emit_or8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SET_DF(s3, d_or8);
} else IFX(X_ALL) {
SET_DFNONE();
}
OR(s1, s1, s2);
IFX(X_PEND) {
SB(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}

View File

@ -0,0 +1,442 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "../tools/bridge_private.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"
// emit ADD32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SDxw(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s3, rex.w?d_add64:d_add32b);
} else IFX(X_ALL) {
SET_DFNONE();
}
IFX(X_CF) {
if (rex.w) {
AND(s5, xMASK, s1);
AND(s4, xMASK, s2);
ADD(s5, s5, s4); // lo
SRLI(s3, s1, 0x20);
SRLI(s4, s2, 0x20);
ADD(s4, s4, s3);
SRLI(s5, s5, 0x20);
ADD(s5, s5, s4); // hi
SRAI(s5, s5, 0x20);
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
} else {
ADD(s5, s1, s2);
SRLI(s5, s5, 0x20);
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
}
}
IFX(X_AF | X_OF) {
OR(s3, s1, s2); // s3 = op1 | op2
AND(s4, s1, s2); // s4 = op1 & op2
}
ADDxw(s1, s1, s2);
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_AF | X_OF) {
NOT(s2, s1); // s2 = ~res
AND(s3, s2, s3); // s3 = ~res & (op1 | op2)
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
}
IFX(X_OF) {
SRLI(s3, s3, rex.w?62:30);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
}
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
}
// emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch
void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5)
{
CLEAR_FLAGS();
if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags==X_PEND))
{
// special case when doing math on ESP and only PEND is needed: ignoring it!
if(c >= -2048 && c < 2048) {
ADDIxw(s1, s1, c);
} else {
MOV64xw(s2, c);
ADDxw(s1, s1, s2);
}
return;
}
IFX(X_PEND | X_AF | X_CF | X_OF) {
MOV64xw(s2, c);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SDxw(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s3, rex.w?d_add64:d_add32b);
} else IFX(X_ALL) {
SET_DFNONE();
}
IFX(X_CF) {
if (rex.w) {
AND(s5, xMASK, s1);
AND(s4, xMASK, s2);
ADD(s5, s5, s4); // lo
SRLI(s3, s1, 0x20);
SRLI(s4, s2, 0x20);
ADD(s4, s4, s3);
SRLI(s5, s5, 0x20);
ADD(s5, s5, s4); // hi
SRAI(s5, s5, 0x20);
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
} else {
ADD(s5, s1, s2);
SRLI(s5, s5, 0x20);
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
}
}
IFX(X_AF | X_OF) {
OR(s3, s1, s2); // s3 = op1 | op2
AND(s4, s1, s2); // s4 = op1 & op2
}
if(c >= -2048 && c < 2048) {
ADDIxw(s1, s1, c);
} else {
IFX(X_PEND | X_AF | X_CF | X_OF) {} else {MOV64xw(s2, c);}
ADDxw(s1, s1, s2);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_AF | X_OF) {
NOT(s2, s1); // s2 = ~res
AND(s3, s2, s3); // s3 = ~res & (op1 | op2)
OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s4, s3, 0x08); // AF: cc & 0x08
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_AF);
}
IFX(X_OF) {
SRLI(s3, s3, rex.w?62:30);
SRLI(s4, s3, 1);
XOR(s3, s3, s4);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
}
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
}
// emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
{
CLEAR_FLAGS();
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SDxw(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s3, rex.w?d_sub64:d_sub32);
} else IFX(X_ALL) {
SET_DFNONE();
}
IFX(X_AF | X_CF | X_OF) {
// for later flag calculation
NOT(s5, s1);
}
SUBxw(s1, s1, s2);
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit SUB32 instruction, from s1, constant c, store result in s1 using s2, s3, s4 and s5 as scratch
void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5)
{
CLEAR_FLAGS();
if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags==X_PEND))
{
// special case when doing math on RSP and only PEND is needed: ignoring it!
if (c > -2048 && c <= 2048) {
ADDI(s1, s1, -c);
} else {
MOV64xw(s2, c);
SUBxw(s1, s1, s2);
}
return;
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
MOV64xw(s2, c);
SDxw(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s3, rex.w?d_sub64:d_sub32);
} else IFX(X_ALL) {
SET_DFNONE();
}
IFX(X_AF | X_CF | X_OF) {
// for later flag calculation
NOT(s5, s1);
}
if (c > -2048 && c <= 2048) {
ADDIxw(s1, s1, -c);
} else {
IFX(X_PEND) {} else {MOV64xw(s2, c);}
SUBxw(s1, s1, s2);
}
IFX(X_AF | X_CF | X_OF) {
IFX(X_PEND) {}
else if (c > -2048 && c <= 2048) {
MOV64xw(s2, c);
}
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit INC32 instruction, from s1, store result in s1 using s3 and s4 as scratch
void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
{
IFX(X_ALL) {
ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF)));
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SET_DF(s3, rex.w?d_inc64:d_inc32);
} else IFX(X_ALL) {
SET_DFNONE();
}
IFX(X_AF | X_OF) {
ORI(s3, s1, 1); // s3 = op1 | op2
ANDI(s5, s1, 1); // s5 = op1 & op2
}
ADDIxw(s1, s1, 1);
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_AF | X_OF) {
NOT(s2, s1); // s2 = ~res
AND(s3, s2, s3); // s3 = ~res & (op1 | op2)
OR(s3, s3, s5); // cc = (~res & (op1 | op2)) | (op1 & op2)
IFX(X_AF) {
ANDI(s2, s3, 0x08); // AF: cc & 0x08
BEQZ(s2, 8);
ORI(xFlags, xFlags, 1 << F_AF);
}
IFX(X_OF) {
SRLI(s3, s3, rex.w?62:30);
SRLI(s2, s3, 1);
XOR(s3, s3, s2);
ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
}
}
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s2);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
}
// emit SBB8 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch
void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
{
IFX(X_PEND) {
SB(s1, xEmu, offsetof(x64emu_t, op1));
SB(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s3, d_sbb8);
} else IFX(X_ALL) {
SET_DFNONE();
}
IFX(X_AF | X_CF | X_OF) {
// for later flag calculation
NOT(s5, s1);
}
SUBW(s1, s1, s2);
ANDI(s1, s1, 0xff);
ANDI(s3, xFlags, 1 << F_CF);
SUBW(s1, s1, s3);
ANDI(s1, s1, 0xff);
IFX(X_PEND) {
SB(s1, xEmu, offsetof(x64emu_t, res));
}
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8);
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit SBB8 instruction, from s1, constant c, store result in s1 using s3, s4, s5 and s6 as scratch
void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5, int s6)
{
MOV32w(s6, c&0xff);
emit_sbb8(dyn, ninst, s1, s6, s3, s4, s5);
}
// emit SBB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
{
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SDxw(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s3, rex.w?d_sbb64:d_sbb32);
} else IFX(X_ALL) {
SET_DFNONE();
}
IFX(X_AF | X_CF | X_OF) {
// for later flag calculation
NOT(s5, s1);
}
SUBxw(s1, s1, s2);
ANDI(s3, xFlags, 1 << F_CF);
SUBxw(s1, s1, s3);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}

View File

@ -0,0 +1,252 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "../tools/bridge_private.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"
// emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
{
// s2 is not 0 here and is 1..1f/3f
CLEAR_FLAGS();
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SDxw(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, rex.w?d_shl64:d_shl32);
} else IFX(X_ALL) {
SET_DFNONE();
}
IFX(X_CF) {
SUBI(s5, s2, rex.w?64:32);
NEG(s5, s5);
SRL(s3, s1, s5);
ANDI(s5, s3, 1); // LSB
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
}
SLL(s1, s1, s2);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_OF) {
SRLIxw(s3, s1, rex.w?63:31);
XOR(s3, s3, s5);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit SHL32 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch
void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5)
{
CLEAR_FLAGS();
IFX(X_PEND) {
if (c) {
MOV64x(s3, c);
SDxw(s3, xEmu, offsetof(x64emu_t, op2));
} else SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SET_DF(s4, rex.w?d_shl64:d_shl32);
} else IFX(X_ALL) {
SET_DFNONE();
}
if(!c) {
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
return;
}
IFX(X_CF) {
if (c > 0) {
SRLI(s3, s1, (rex.w?64:32)-c);
ANDI(s5, s3, 1); // LSB
BEQZ(s5, 8);
ORI(xFlags, xFlags, 1 << F_CF);
} else {
IFX(X_OF) MOV64x(s5, 0);
}
}
SLLIxw(s1, s1, c);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_OF) {
SRLIxw(s3, s1, rex.w?63:31);
XOR(s3, s3, s5);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
if (c) {
MOV64x(s3, c);
SDxw(s3, xEmu, offsetof(x64emu_t, op2));
} else SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SET_DF(s4, rex.w?d_shr64:d_shr32);
} else IFX(X_ALL) {
SET_DFNONE();
}
if(!c) {
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
return;
}
IFX(X_CF) {
if (c > 1) {
SRAI(s3, s1, c-1);
ANDI(s3, s3, 1); // LSB
BEQZ(s3, 8);
} else {
// no need to shift
ANDI(s3, s1, 1);
BEQZ(s3, 8);
}
ORI(xFlags, xFlags, 1 << F_CF);
}
SRLIxw(s1, s1, c);
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_OF) {
if(c==1) {
SRLI(s3, s1, rex.w?62:30);
SRLI(s4, s1, rex.w?63:31);
XOR(s3, s3, s4);
ANDI(s3, s3, 1);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_OF2);
}
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
{
CLEAR_FLAGS();
IFX(X_PEND) {
if (c) {
MOV64x(s3, c);
SDxw(s3, xEmu, offsetof(x64emu_t, op2));
} else SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SET_DF(s4, rex.w?d_sar64:d_sar32);
} else IFX(X_ALL) {
SET_DFNONE();
}
if(!c) {
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
return;
}
IFX(X_CF) {
if (c > 1) {
SRAI(s3, s1, c-1);
ANDI(s3, s3, 1); // LSB
BEQZ(s3, 8);
} else {
// no need to shift
ANDI(s3, s1, 1);
BEQZ(s3, 8);
}
ORI(xFlags, xFlags, 1 << F_CF);
}
SRAIxw(s1, s1, c);
// SRAIW sign-extends, so test sign bit before clearing upper bits
IFX(X_SF) {
BGE(s1, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s1);
}
IFX(X_PEND) {
SDxw(s1, xEmu, offsetof(x64emu_t, res));
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}

View File

@ -0,0 +1,317 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "../tools/bridge_private.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"
// emit CMP8 instruction, from cmp s1, s2, using s3, s4, s5 and s6 as scratch
void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6)
{
CLEAR_FLAGS();
IFX_PENDOR0 {
SB(s1, xEmu, offsetof(x64emu_t, op1));
SB(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, d_cmp8);
} else {
SET_DFNONE();
}
IFX(X_AF | X_CF | X_OF) {
// for later flag calculation
NOT(s5, s1);
}
// It's a cmp, we can't store the result back to s1.
SUB(s6, s1, s2);
ANDI(s6, s6, 0xff);
IFX_PENDOR0 {
SB(s6, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
SRLI(s3, s6, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 8);
IFX(X_ZF) {
BNEZ(s6, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s6, s3, s4);
}
}
// emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch
void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4)
{
CLEAR_FLAGS();
IFX_PENDOR0 {
SB(s1, xEmu, offsetof(x64emu_t, op1));
SB(xZR, xEmu, offsetof(x64emu_t, op2));
SB(s1, xEmu, offsetof(x64emu_t, res));
SET_DF(s3, d_cmp8);
} else {
SET_DFNONE();
}
IFX(X_SF) {
SRLI(s3, s1, 7);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit CMP16 instruction, from cmp s1, s2, using s3 and s4 as scratch
void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6)
{
CLEAR_FLAGS();
IFX_PENDOR0 {
SH(s1, xEmu, offsetof(x64emu_t, op1));
SH(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, d_cmp16);
} else {
SET_DFNONE();
}
IFX(X_AF | X_CF | X_OF) {
// for later flag calculation
NOT(s5, s1);
}
// It's a cmp, we can't store the result back to s1.
SUB(s6, s1, s2);
SLLI(s6, s6, 48);
SRLI(s6, s6, 48);
IFX_PENDOR0 {
SH(s6, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
SRLI(s3, s6, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 16);
IFX(X_ZF) {
BNEZ(s6, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s6, s3, s4);
}
}
// emit CMP16 instruction, from cmp s1 , #0, using s3 and s4 as scratch
void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4)
{
CLEAR_FLAGS();
IFX_PENDOR0 {
SH(s1, xEmu, offsetof(x64emu_t, op1));
SH(xZR, xEmu, offsetof(x64emu_t, op2));
SH(s1, xEmu, offsetof(x64emu_t, res));
SET_DF(s3, d_cmp16);
} else {
SET_DFNONE();
}
IFX(X_SF) {
SRLI(s3, s1, 15);
BEQZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit CMP32 instruction, from cmp s1, s2, using s3 and s4 as scratch
void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6)
{
CLEAR_FLAGS();
IFX_PENDOR0 {
SDxw(s1, xEmu, offsetof(x64emu_t, op1));
SDxw(s2, xEmu, offsetof(x64emu_t, op2));
SET_DF(s4, rex.w?d_cmp64:d_cmp32);
} else {
SET_DFNONE();
}
IFX(X_AF | X_CF | X_OF) {
// for later flag calculation
NOT(s5, s1);
}
// It's a cmp, we can't store the result back to s1.
SUBxw(s6, s1, s2);
IFX_PENDOR0 {
SDxw(s6, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
BGE(s6, xZR, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
if (!rex.w) {
ZEROUP(s6);
}
CALC_SUB_FLAGS(s5, s2, s6, s3, s4, rex.w?64:32);
IFX(X_ZF) {
BNEZ(s6, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s6, s3, s4);
}
}
// emit CMP32 instruction, from cmp s1, 0, using s3 and s4 as scratch
void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4)
{
CLEAR_FLAGS();
IFX_PENDOR0 {
SD(s1, xEmu, offsetof(x64emu_t, op1));
SD(xZR, xEmu, offsetof(x64emu_t, op2));
SD(s1, xEmu, offsetof(x64emu_t, res));
SET_DF(s4, rex.w?d_cmp64:d_cmp32);
} else {
SET_DFNONE();
}
IFX(X_SF) {
if (rex.w) {
BGE(s1, xZR, 8);
} else {
SRLI(s3, s1, 31);
BEQZ(s3, 8);
}
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s1, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s1, s3, s4);
}
}
// emit TEST8 instruction, from test s1, s2, using s3, s4 and s5 as scratch
void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5) {
CLEAR_FLAGS();
IFX_PENDOR0 {
SET_DF(s3, d_tst8);
} else {
SET_DFNONE();
}
AND(s3, s1, s2); // res = s1 & s2
IFX_PENDOR0 {
SD(s3, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
SRLI(s4, s3, 7);
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s3, s4, s5);
}
}
// emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch
void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
{
CLEAR_FLAGS();
IFX_PENDOR0 {
SET_DF(s3, rex.w?d_tst64:d_tst32);
} else {
SET_DFNONE();
}
AND(s3, s1, s2); // res = s1 & s2
IFX_PENDOR0 {
SDxw(s3, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
if (!rex.w) ZEROUP(s3);
SRLI(s4, s3, rex.w?63:31);
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s3, s4, s5);
}
}
// emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch
void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5)
{
CLEAR_FLAGS();
IFX_PENDOR0 {
SET_DF(s3, rex.w?d_tst64:d_tst32);
} else {
SET_DFNONE();
}
if(c>=-2048 && c<=2047)
ANDI(s3, s1, c);
else {
MOV64x(s3, c);
AND(s3, s1, s3); // res = s1 & s2
}
IFX_PENDOR0 {
SDxw(s3, xEmu, offsetof(x64emu_t, res));
}
IFX(X_SF) {
if (!rex.w) ZEROUP(s3);
SRLI(s4, s3, rex.w?63:31);
BEQZ(s4, 8);
ORI(xFlags, xFlags, 1 << F_SF);
}
IFX(X_ZF) {
BNEZ(s3, 8);
ORI(xFlags, xFlags, 1 << F_ZF);
}
IFX(X_PF) {
emit_pf(dyn, ninst, s3, s4, s5);
}
}

View File

@ -0,0 +1,102 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_helper.h"
#include "dynarec_rv64_functions.h"
uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
{
(void)ip; (void)rep; (void)need_epilog;
uint8_t opcode = F8;
uint8_t nextop;
uint8_t gd, ed, u8;
uint8_t wback, wb1, wb2, eb1, eb2, gb1, gb2;
int32_t i32;
int64_t i64, j64;
int64_t fixedaddress;
int unscaled;
MAYUSE(eb1);
MAYUSE(eb2);
MAYUSE(gb1);
MAYUSE(gb2);
MAYUSE(wb1);
MAYUSE(wb2);
MAYUSE(j64);
while((opcode==0xF2) || (opcode==0xF3)) {
rep = opcode-0xF1;
opcode = F8;
}
// REX prefix before the F0 are ignored
rex.rex = 0;
while(opcode>=0x40 && opcode<=0x4f) {
rex.rex = opcode;
opcode = F8;
}
switch(opcode) {
case 0x0F:
nextop = F8;
switch(nextop) {
case 0xC1:
switch(rep) {
case 0:
INST_NAME("LOCK XADD Gd, Ed");
SETFLAGS(X_ALL, SF_SET_PENDING);
nextop = F8;
GETGD;
SMDMB();
if(MODREG) {
ed = xRAX+(nextop&7)+(rex.b<<3);
MOV64xw(x1, ed);
MOV64xw(ed, gd);
MOV64xw(gd, x1);
emit_add32(dyn, ninst, rex, ed, gd, x3, x4, x5);
} else {
addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
MARKLOCK;
LRxw(x1, wback, 1, 1);
ADDxw(x4, x1, gd);
SCxw(x3, x4, wback, 1, 1);
BNEZ_MARKLOCK(x3);
IFX(X_ALL|X_PEND) {
MV(x2, x1);
emit_add32(dyn, ninst, rex, x2, gd, x3, x4, x5);
}
MV(gd, x1);
}
SMDMB();
break;
default:
DEFAULT;
}
break;
default:
DEFAULT;
}
break;
default:
DEFAULT;
}
return addr;
}

View File

@ -0,0 +1,59 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"
uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog)
{
(void)ip; (void)need_epilog;
uint8_t opcode = F8;
uint8_t nextop, u8;
uint8_t gd, ed;
uint8_t wback;
uint64_t u64;
int v0, v1;
int q0, q1;
int d0, d1;
int64_t fixedaddress;
int unscaled;
int64_t j64;
MAYUSE(d0);
MAYUSE(d1);
MAYUSE(q0);
MAYUSE(q1);
MAYUSE(v0);
MAYUSE(v1);
MAYUSE(j64);
switch(opcode) {
case 0x1E:
INST_NAME("NOP / ENDBR32 / ENDBR64");
nextop = F8;
FAKEED;
break;
default:
DEFAULT;
}
return addr;
}

View File

@ -0,0 +1,493 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <errno.h>
#include <string.h>
#include <math.h>
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "tools/bridge_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "emu/x87emu_private.h"
#include "x64trace.h"
#include "signals.h"
#include "dynarec_rv64.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "custommem.h"
#include "bridge.h"
#include "rv64_lock.h"
#define XMM0 0
#define X870 XMM0+16
#define EMM0 XMM0+16
#define SCRATCH0 0
// Get a FPU scratch reg
int fpu_get_scratch(dynarec_rv64_t* dyn)
{
return SCRATCH0 + dyn->e.fpu_scratch++; // return an Sx
}
// Reset scratch regs counter
void fpu_reset_scratch(dynarec_rv64_t* dyn)
{
dyn->e.fpu_scratch = 0;
}
// Get a x87 double reg
int fpu_get_reg_x87(dynarec_rv64_t* dyn, int t, int n)
{
int i=X870;
while (dyn->e.fpuused[i]) ++i;
dyn->e.fpuused[i] = 1;
dyn->e.extcache[i].n = n;
dyn->e.extcache[i].t = t;
dyn->e.news |= (1<<i);
return EXTREG(i); // return a Dx
}
// Free a FPU double reg
void fpu_free_reg(dynarec_rv64_t* dyn, int reg)
{
int idx = EXTIDX(reg);
// TODO: check upper limit?
dyn->e.fpuused[idx] = 0;
if(dyn->e.extcache[idx].t!=EXT_CACHE_ST_F && dyn->e.extcache[idx].t!=EXT_CACHE_ST_D)
dyn->e.extcache[idx].v = 0;
}
// Get an MMX double reg
int fpu_get_reg_emm(dynarec_rv64_t* dyn, int emm)
{
dyn->e.fpuused[EMM0 + emm] = 1;
dyn->e.extcache[EMM0 + emm].t = EXT_CACHE_MM;
dyn->e.extcache[EMM0 + emm].n = emm;
dyn->e.news |= (1<<(EMM0 + emm));
return EXTREG(EMM0 + emm);
}
// Get an XMM quad reg
int fpu_get_reg_xmm(dynarec_rv64_t* dyn, int t, int xmm)
{
int i = XMM0+xmm;
dyn->e.fpuused[i] = 1;
dyn->e.extcache[i].t = t;
dyn->e.extcache[i].n = xmm;
dyn->e.news |= (1<<i);
return EXTREG(i);
}
// Reset fpu regs counter
void fpu_reset_reg(dynarec_rv64_t* dyn)
{
dyn->e.fpu_reg = 0;
for (int i=0; i<24; ++i) {
dyn->e.fpuused[i]=0;
dyn->e.extcache[i].v = 0;
}
}
int extcache_get_st(dynarec_rv64_t* dyn, int ninst, int a)
{
if (dyn->insts[ninst].e.swapped) {
if(dyn->insts[ninst].e.combined1 == a)
a = dyn->insts[ninst].e.combined2;
else if(dyn->insts[ninst].e.combined2 == a)
a = dyn->insts[ninst].e.combined1;
}
for(int i=0; i<24; ++i)
if((dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F
|| dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D)
&& dyn->insts[ninst].e.extcache[i].n==a)
return dyn->insts[ninst].e.extcache[i].t;
// not in the cache yet, so will be fetched...
return EXT_CACHE_ST_D;
}
int extcache_get_current_st(dynarec_rv64_t* dyn, int ninst, int a)
{
(void)ninst;
if(!dyn->insts)
return EXT_CACHE_ST_D;
for(int i=0; i<24; ++i)
if((dyn->e.extcache[i].t==EXT_CACHE_ST_F
|| dyn->e.extcache[i].t==EXT_CACHE_ST_D)
&& dyn->e.extcache[i].n==a)
return dyn->e.extcache[i].t;
// not in the cache yet, so will be fetched...
return EXT_CACHE_ST_D;
}
int extcache_get_st_f(dynarec_rv64_t* dyn, int ninst, int a)
{
for(int i=0; i<24; ++i)
if(dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F
&& dyn->insts[ninst].e.extcache[i].n==a)
return i;
return -1;
}
int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a)
{
for(int i=0; i<24; ++i)
if(dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F
&& dyn->insts[ninst].e.extcache[i].n==a)
return i;
return -1;
}
int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a)
{
for(int i=0; i<24; ++i)
if(dyn->e.extcache[i].t==EXT_CACHE_ST_F
&& dyn->e.extcache[i].n==a)
return i;
return -1;
}
static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int maxinst, int a);
static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int maxinst, int a);
static void extcache_promote_double_combined(dynarec_rv64_t* dyn, int ninst, int maxinst, int a)
{
if(a == dyn->insts[ninst].e.combined1 || a == dyn->insts[ninst].e.combined2) {
if(a == dyn->insts[ninst].e.combined1) {
a = dyn->insts[ninst].e.combined2;
} else
a = dyn->insts[ninst].e.combined1;
int i = extcache_get_st_f_noback(dyn, ninst, a);
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_combined, ninst=%d combined%c %d i=%d (stack:%d/%d)\n", ninst, (a == dyn->insts[ninst].e.combined2)?'2':'1', a ,i, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop);
if(i>=0) {
dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D;
if(!dyn->insts[ninst].e.barrier)
extcache_promote_double_internal(dyn, ninst-1, maxinst, a-dyn->insts[ninst].e.stack_push);
// go forward is combined is not pop'd
if(a-dyn->insts[ninst].e.stack_pop>=0)
if(!dyn->insts[ninst+1].e.barrier)
extcache_promote_double_forward(dyn, ninst+1, maxinst, a-dyn->insts[ninst].e.stack_pop);
}
}
}
static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int maxinst, int a)
{
if(dyn->insts[ninst+1].e.barrier)
return;
while(ninst>=0) {
a+=dyn->insts[ninst].e.stack_pop; // adjust Stack depth: add pop'd ST (going backward)
int i = extcache_get_st_f(dyn, ninst, a);
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_internal, ninst=%d, a=%d st=%d:%d, i=%d\n", ninst, a, dyn->insts[ninst].e.stack, dyn->insts[ninst].e.stack_next, i);
if(i<0) return;
dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D;
// check combined propagation too
if(dyn->insts[ninst].e.combined1 || dyn->insts[ninst].e.combined2) {
if(dyn->insts[ninst].e.swapped) {
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_internal, ninst=%d swapped %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].e.combined1 ,dyn->insts[ninst].e.combined2, a, dyn->insts[ninst].e.stack);
if (a==dyn->insts[ninst].e.combined1)
a = dyn->insts[ninst].e.combined2;
else if (a==dyn->insts[ninst].e.combined2)
a = dyn->insts[ninst].e.combined1;
} else {
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_internal, ninst=%d combined %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].e.combined1 ,dyn->insts[ninst].e.combined2, a, dyn->insts[ninst].e.stack);
extcache_promote_double_combined(dyn, ninst, maxinst, a);
}
}
a-=dyn->insts[ninst].e.stack_push; // // adjust Stack depth: remove push'd ST (going backward)
--ninst;
if(ninst<0 || a<0 || dyn->insts[ninst].e.barrier)
return;
}
}
static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int maxinst, int a)
{
while((ninst!=-1) && (ninst<maxinst) && (a>=0)) {
a+=dyn->insts[ninst].e.stack_push; // // adjust Stack depth: add push'd ST (going forward)
if((dyn->insts[ninst].e.combined1 || dyn->insts[ninst].e.combined2) && dyn->insts[ninst].e.swapped) {
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_forward, ninst=%d swapped %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].e.combined1 ,dyn->insts[ninst].e.combined2, a, dyn->insts[ninst].e.stack);
if (a==dyn->insts[ninst].e.combined1)
a = dyn->insts[ninst].e.combined2;
else if (a==dyn->insts[ninst].e.combined2)
a = dyn->insts[ninst].e.combined1;
}
int i = extcache_get_st_f_noback(dyn, ninst, a);
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_forward, ninst=%d, a=%d st=%d:%d(%d/%d), i=%d\n", ninst, a, dyn->insts[ninst].e.stack, dyn->insts[ninst].e.stack_next, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop, i);
if(i<0) return;
dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D;
// check combined propagation too
if((dyn->insts[ninst].e.combined1 || dyn->insts[ninst].e.combined2) && !dyn->insts[ninst].e.swapped) {
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_forward, ninst=%d combined %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].e.combined1 ,dyn->insts[ninst].e.combined2, a, dyn->insts[ninst].e.stack);
extcache_promote_double_combined(dyn, ninst, maxinst, a);
}
a-=dyn->insts[ninst].e.stack_pop; // adjust Stack depth: remove pop'd ST (going forward)
if(dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].e.barrier)
++ninst;
else
ninst=-1;
}
if(ninst==maxinst)
extcache_promote_double(dyn, ninst, a);
}
void extcache_promote_double(dynarec_rv64_t* dyn, int ninst, int a)
{
int i = extcache_get_current_st_f(dyn, a);
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double, ninst=%d a=%d st=%d i=%d\n", ninst, a, dyn->e.stack, i);
if(i<0) return;
dyn->e.extcache[i].t = EXT_CACHE_ST_D;
dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D;
// check combined propagation too
if(dyn->e.combined1 || dyn->e.combined2) {
if(dyn->e.swapped) {
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double, ninst=%d swapped! %d/%d vs %d\n", ninst, dyn->e.combined1 ,dyn->e.combined2, a);
if(dyn->e.combined1 == a)
a = dyn->e.combined2;
else if(dyn->e.combined2 == a)
a = dyn->e.combined1;
} else {
//if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double, ninst=%d combined! %d/%d vs %d\n", ninst, dyn->e.combined1 ,dyn->e.combined2, a);
if(dyn->e.combined1 == a)
extcache_promote_double(dyn, ninst, dyn->e.combined2);
else if(dyn->e.combined2 == a)
extcache_promote_double(dyn, ninst, dyn->e.combined1);
}
}
a-=dyn->insts[ninst].e.stack_push; // // adjust Stack depth: remove push'd ST (going backward)
if(!ninst || a<0) return;
extcache_promote_double_internal(dyn, ninst-1, ninst, a);
}
int extcache_combine_st(dynarec_rv64_t* dyn, int ninst, int a, int b)
{
dyn->e.combined1=a;
dyn->e.combined2=b;
if( extcache_get_current_st(dyn, ninst, a)==EXT_CACHE_ST_F
&& extcache_get_current_st(dyn, ninst, b)==EXT_CACHE_ST_F )
return EXT_CACHE_ST_F;
return EXT_CACHE_ST_D;
}
static int isCacheEmpty(dynarec_native_t* dyn, int ninst) {
if(dyn->insts[ninst].e.stack_next) {
return 0;
}
for(int i=0; i<24; ++i)
if(dyn->insts[ninst].e.extcache[i].v) { // there is something at ninst for i
if(!(
(dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F || dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D)
&& dyn->insts[ninst].e.extcache[i].n<dyn->insts[ninst].e.stack_pop))
return 0;
}
return 1;
}
int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) {
int i2 = dyn->insts[ninst].x64.jmp_insts;
if(i2<0)
return 1;
if((dyn->insts[i2].x64.barrier&BARRIER_FLOAT))
// if the barrier as already been apply, no transform needed
return ((dyn->insts[ninst].x64.barrier&BARRIER_FLOAT))?0:(isCacheEmpty(dyn, ninst)?0:1);
int ret = 0;
if(!i2) { // just purge
if(dyn->insts[ninst].e.stack_next) {
return 1;
}
for(int i=0; i<24 && !ret; ++i)
if(dyn->insts[ninst].e.extcache[i].v) { // there is something at ninst for i
if(!(
(dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F || dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D)
&& dyn->insts[ninst].e.extcache[i].n<dyn->insts[ninst].e.stack_pop))
ret = 1;
}
return ret;
}
// Check if ninst can be compatible to i2
if(dyn->insts[ninst].e.stack_next != dyn->insts[i2].e.stack-dyn->insts[i2].e.stack_push) {
return 1;
}
extcache_t cache_i2 = dyn->insts[i2].e;
extcacheUnwind(&cache_i2);
for(int i=0; i<24; ++i) {
if(dyn->insts[ninst].e.extcache[i].v) { // there is something at ninst for i
if(!cache_i2.extcache[i].v) { // but there is nothing at i2 for i
ret = 1;
} else if(dyn->insts[ninst].e.extcache[i].v!=cache_i2.extcache[i].v) { // there is something different
if(dyn->insts[ninst].e.extcache[i].n!=cache_i2.extcache[i].n) { // not the same x64 reg
ret = 1;
}
else if(dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_SS && cache_i2.extcache[i].t == EXT_CACHE_SD)
{/* nothing */ }
else
ret = 1;
}
} else if(cache_i2.extcache[i].v)
ret = 1;
}
return ret;
}
void extcacheUnwind(extcache_t* cache)
{
if(cache->swapped) {
// unswap
int a = -1;
int b = -1;
for(int j=0; j<24 && ((a==-1) || (b==-1)); ++j)
if((cache->extcache[j].t == EXT_CACHE_ST_D || cache->extcache[j].t == EXT_CACHE_ST_F)) {
if(cache->extcache[j].n == cache->combined1)
a = j;
else if(cache->extcache[j].n == cache->combined2)
b = j;
}
if(a!=-1 && b!=-1) {
int tmp = cache->extcache[a].n;
cache->extcache[a].n = cache->extcache[b].n;
cache->extcache[b].n = tmp;
}
cache->swapped = 0;
cache->combined1 = cache->combined2 = 0;
}
if(cache->news) {
// reove the newly created extcache
for(int i=0; i<24; ++i)
if(cache->news&(1<<i))
cache->extcache[i].v = 0;
cache->news = 0;
}
if(cache->stack_push) {
// unpush
for(int j=0; j<24; ++j) {
if((cache->extcache[j].t == EXT_CACHE_ST_D || cache->extcache[j].t == EXT_CACHE_ST_F)) {
if(cache->extcache[j].n<cache->stack_push)
cache->extcache[j].v = 0;
else
cache->extcache[j].n-=cache->stack_push;
}
}
cache->x87stack-=cache->stack_push;
cache->stack-=cache->stack_push;
cache->stack_push = 0;
}
cache->x87stack+=cache->stack_pop;
cache->stack_next = cache->stack;
cache->stack_pop = 0;
cache->barrier = 0;
// And now, rebuild the x87cache info with extcache
cache->mmxcount = 0;
cache->fpu_scratch = 0;
cache->fpu_extra_qscratch = 0;
cache->fpu_reg = 0;
for(int i=0; i<8; ++i) {
cache->x87cache[i] = -1;
cache->mmxcache[i] = -1;
cache->x87reg[i] = 0;
cache->ssecache[i*2].v = -1;
cache->ssecache[i*2+1].v = -1;
}
int x87reg = 0;
for(int i=0; i<24; ++i) {
if(cache->extcache[i].v) {
cache->fpuused[i] = 1;
switch (cache->extcache[i].t) {
case EXT_CACHE_MM:
cache->mmxcache[cache->extcache[i].n] = i;
++cache->mmxcount;
++cache->fpu_reg;
break;
case EXT_CACHE_SS:
cache->ssecache[cache->extcache[i].n].reg = i;
cache->ssecache[cache->extcache[i].n].single = 1;
++cache->fpu_reg;
break;
case EXT_CACHE_SD:
cache->ssecache[cache->extcache[i].n].reg = i;
cache->ssecache[cache->extcache[i].n].single = 0;
++cache->fpu_reg;
break;
case EXT_CACHE_ST_F:
case EXT_CACHE_ST_D:
cache->x87cache[x87reg] = cache->extcache[i].n;
cache->x87reg[x87reg] = i;
++x87reg;
++cache->fpu_reg;
break;
case EXT_CACHE_SCR:
cache->fpuused[i] = 0;
cache->extcache[i].v = 0;
break;
}
} else {
cache->fpuused[i] = 0;
}
}
}
uint8_t extract_byte(uint32_t val, void* address){
int idx = (((uintptr_t)address)&3)*8;
return (val>>idx)&0xff;
}
uint32_t insert_byte(uint32_t val, uint8_t b, void* address){
int idx = (((uintptr_t)address)&3)*8;
val&=~(0xff<<idx);
val|=(((uint32_t)b)<<idx);
return val;
}
// will go badly if address is unaligned
uint16_t extract_half(uint32_t val, void* address){
int idx = (((uintptr_t)address)&3)*8;
return (val>>idx)&0xffff;
}
uint32_t insert_half(uint32_t val, uint16_t h, void* address){
int idx = (((uintptr_t)address)&3)*8;
val&=~(0xffff<<idx);
val|=(((uint32_t)h)<<idx);
return val;
}
uint8_t rv64_lock_xchg_b(void* addr, uint8_t val)
{
uint32_t ret;
uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
do {
ret = *aligned;
} while(rv64_lock_cas_d(aligned, ret, insert_byte(ret, val, addr)));
return extract_byte(ret, addr);
}
int rv64_lock_cas_b(void* addr, uint8_t ref, uint8_t val)
{
uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
uint32_t tmp = *aligned;
return rv64_lock_cas_d(aligned, tmp, insert_byte(tmp, val, addr));
}
int rv64_lock_cas_h(void* addr, uint16_t ref, uint16_t val)
{
uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
uint32_t tmp = *aligned;
return rv64_lock_cas_d(aligned, tmp, insert_half(tmp, val, addr));
}
const char* getCacheName(int t, int n)
{
static char buff[20];
switch(t) {
case EXT_CACHE_ST_D: sprintf(buff, "ST%d", n); break;
case EXT_CACHE_ST_F: sprintf(buff, "st%d", n); break;
case EXT_CACHE_MM: sprintf(buff, "MM%d", n); break;
case EXT_CACHE_SS: sprintf(buff, "SS%d", n); break;
case EXT_CACHE_SD: sprintf(buff, "SD%d", n); break;
case EXT_CACHE_SCR: sprintf(buff, "Scratch"); break;
case EXT_CACHE_NONE: buff[0]='\0'; break;
}
return buff;
}

View File

@ -0,0 +1,46 @@
#ifndef __DYNAREC_RV64_FUNCTIONS_H__
#define __DYNAREC_RV64_FUNCTIONS_H__
#include "../dynarec_native_functions.h"
typedef struct x64emu_s x64emu_t;
typedef struct dynarec_rv64_s dynarec_rv64_t;
// Get an FPU scratch reg
int fpu_get_scratch(dynarec_rv64_t* dyn);
// Reset scratch regs counter
void fpu_reset_scratch(dynarec_rv64_t* dyn);
// Get an x87 double reg
int fpu_get_reg_x87(dynarec_rv64_t* dyn, int t, int n);
// Get an MMX double reg
int fpu_get_reg_emm(dynarec_rv64_t* dyn, int emm);
// Get an XMM quad reg
int fpu_get_reg_xmm(dynarec_rv64_t* dyn, int t, int xmm);
// Free a FPU/MMX/XMM reg
void fpu_free_reg(dynarec_rv64_t* dyn, int reg);
// Reset fpu regs counter
void fpu_reset_reg(dynarec_rv64_t* dyn);
// ---- Neon cache functions
// Get type for STx
int extcache_get_st(dynarec_rv64_t* dyn, int ninst, int a);
// Get if STx is FLOAT or DOUBLE
int extcache_get_st_f(dynarec_rv64_t* dyn, int ninst, int a);
// Get actual type for STx
int extcache_get_current_st(dynarec_rv64_t* dyn, int ninst, int a);
// Get actual STx is FLOAT or DOUBLE
int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a);
// Back-propagate a change float->double
void extcache_promote_double(dynarec_rv64_t* dyn, int ninst, int a);
// Combine and propagate if needed (pass 1 only)
int extcache_combine_st(dynarec_rv64_t* dyn, int ninst, int a, int b); // with stack current dyn->n_stack*
// FPU Cache transformation (for loops) // Specific, need to be writen par backend
int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst);
// Undo the changes of a extcache to get the status before the instruction
void extcacheUnwind(extcache_t* cache);
const char* getCacheName(int t, int n);
#endif //__DYNAREC_RV64_FUNCTIONS_H__

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,13 @@
#include <stdint.h>
#include "rv64_emitter.h"
#define EMIT(A) *block = (A); ++block
void CreateJmpNext(void* addr, void* next)
{
uint32_t* block = (uint32_t*)addr;
uintptr_t diff = (intptr_t)next - (intptr_t)addr;
AUIPC(x2, diff>>12);
LD(x2, x2, diff&0b111111111111);
BR(x2);
}

View File

@ -0,0 +1,50 @@
#define INIT uintptr_t sav_addr=addr
#define FINI \
dyn->isize = addr-sav_addr; \
dyn->insts[ninst].x64.addr = addr; \
if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr
#define MESSAGE(A, ...)
#define MAYSETFLAGS() dyn->insts[ninst].x64.may_set = 1
#define READFLAGS(A) \
dyn->insts[ninst].x64.use_flags = A; dyn->f.dfnone = 1;\
dyn->f.pending=SF_SET
#define SETFLAGS(A,B) \
dyn->insts[ninst].x64.set_flags = A; \
dyn->insts[ninst].x64.state_flags = B; \
dyn->f.pending=(B)&SF_SET_PENDING; \
dyn->f.dfnone=((B)&SF_SET)?1:0;
#define EMIT(A)
#define JUMP(A, C) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C
#define BARRIER(A) if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1
#define BARRIER_NEXT(A) dyn->insts[ninst+1].x64.barrier = A
#define NEW_INST \
++dyn->size; \
if(dyn->size+3>=dyn->cap) { \
dyn->insts = (instruction_native_t*)customRealloc(dyn->insts, sizeof(instruction_native_t)*dyn->cap*2);\
memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_native_t)*dyn->cap); \
dyn->cap *= 2; \
} \
dyn->insts[ninst].x64.addr = ip; \
dyn->insts[ninst].f_entry = dyn->f; \
if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;}
#define INST_EPILOG \
dyn->insts[ninst].f_exit = dyn->f; \
dyn->insts[ninst].x64.has_next = (ok>0)?1:0;
#define INST_NAME(name)
#define DEFAULT \
--dyn->size; \
*ok = -1; \
if(box64_dynarec_log>=LOG_INFO) {\
dynarec_log(LOG_NONE, "%p: Dynarec stopped because of Opcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \
(void*)ip, PKip(0), \
PKip(1), PKip(2), PKip(3), \
PKip(4), PKip(5), PKip(6), \
PKip(7), PKip(8), PKip(9), \
PKip(10),PKip(11),PKip(12), \
PKip(13),PKip(14)); \
printFunctionAddr(ip, " => "); \
dynarec_log(LOG_NONE, "\n"); \
}

View File

@ -0,0 +1,11 @@
#define INIT
#define FINI
#define MESSAGE(A, ...)
#define EMIT(A)
#define NEW_INST \
dyn->insts[ninst].f_entry = dyn->f; \
#define INST_EPILOG \
dyn->insts[ninst].f_exit = dyn->f
#define INST_NAME(name)

View File

@ -0,0 +1,18 @@
#define INIT dyn->native_size = 0
#define FINI \
if(ninst) { \
dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size); \
dyn->insts_size += 1+((dyn->insts[ninst].x64.size>dyn->insts[ninst].size)?dyn->insts[ninst].x64.size:dyn->insts[ninst].size)/15; \
}
#define MESSAGE(A, ...)
#define EMIT(A) do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0)
#define NEW_INST \
if(ninst) { \
dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size); \
dyn->insts_size += 1+((dyn->insts[ninst-1].x64.size>dyn->insts[ninst-1].size)?dyn->insts[ninst-1].x64.size:dyn->insts[ninst-1].size)/15; \
}
#define INST_EPILOG dyn->insts[ninst].epilog = dyn->native_size;
#define INST_NAME(name)
#define TABLE64(A, V) {Table64(dyn, (V)); EMIT(0); EMIT(0);}
#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; Table64(dyn, v.q); EMIT(0); EMIT(0);}

View File

@ -0,0 +1,60 @@
#define INIT
#define FINI \
if(ninst) \
addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size/4); \
addInst(dyn->instsize, &dyn->insts_size, 0, 0);
#define EMIT(A) \
do { \
if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), rv64_print(A, (uintptr_t)dyn->block));} \
*(uint32_t*)(dyn->block) = (uint32_t)(A); \
dyn->block += 4; dyn->native_size += 4; \
dyn->insts[ninst].size2 += 4; \
}while(0)
#define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
#define NEW_INST \
if(box64_dynarec_dump) {\
dynarec_log(LOG_NONE, "%sNew instruction %d, native=%p (0x%x)%s\n", \
(box64_dynarec_dump>1)?"\e[4;32m":"", \
ninst, dyn->block, dyn->native_size, \
(box64_dynarec_dump>1)?"\e[m":"" \
);\
}\
if(ninst) \
addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst-1].x64.size, dyn->insts[ninst-1].size/4);
#define INST_EPILOG
#define INST_NAME(name) \
if(box64_dynarec_dump) {\
printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); \
dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d", \
(box64_dynarec_dump>1)?"\e[32m":"", \
(void*)(dyn->native_start+dyn->insts[ninst].address), \
dyn->insts[ninst].size/4, \
ninst, \
dyn->insts[ninst].x64.barrier, \
dyn->insts[ninst].x64.state_flags, \
dyn->f.pending, \
dyn->f.dfnone, \
dyn->insts[ninst].x64.may_set?"may":"set", \
dyn->insts[ninst].x64.set_flags, \
dyn->insts[ninst].x64.gen_flags, \
dyn->insts[ninst].x64.use_flags, \
dyn->insts[ninst].x64.need_before, \
dyn->insts[ninst].x64.need_after, \
dyn->smread, dyn->smwrite); \
if(dyn->insts[ninst].pred_sz) { \
dynarec_log(LOG_NONE, ", pred="); \
for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii) \
dynarec_log(LOG_NONE, "%s%d", ii?"/":"", dyn->insts[ninst].pred[ii]); \
} \
if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts>=0) \
dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts); \
if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts==-1) \
dynarec_log(LOG_NONE, ", jmp=out"); \
if(dyn->last_ip) \
dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip); \
dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); \
}
#define TABLE64(A, V) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); AUIPC(A, SPLIT20(val64offset)); LD(A, A, SPLIT12(val64offset));}
#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); AUIPC(x1, SPLIT20(val64offset)); FLD(A, x1, SPLIT12(val64offset));}

View File

@ -0,0 +1,138 @@
#ifndef __DYNAREC_RV64_PRIVATE_H_
#define __DYNAREC_RV64_PRIVATE_H_
#include <stdint.h>
#include "../dynarec_private.h"
typedef struct x64emu_s x64emu_t;
typedef struct dynablock_s dynablock_t;
typedef struct instsize_s instsize_t;
#define BARRIER_MAYBE 8
#define EXT_CACHE_NONE 0
#define EXT_CACHE_ST_D 1
#define EXT_CACHE_ST_F 2
#define EXT_CACHE_MM 3
#define EXT_CACHE_SS 4
#define EXT_CACHE_SD 5
#define EXT_CACHE_SCR 6
typedef union ext_cache_s {
int8_t v;
struct {
uint8_t t:4; // reg type
uint8_t n:4; // reg number
};
} ext_cache_t;
typedef union sse_cache_s {
int8_t v;
struct {
uint8_t reg:7;
uint8_t single:1;
};
} sse_cache_t;
typedef struct extcache_s {
// ext cache
ext_cache_t extcache[24];
int8_t stack;
int8_t stack_next;
int8_t stack_pop;
int8_t stack_push;
uint8_t combined1;
uint8_t combined2;
uint8_t swapped; // the combined reg were swapped
uint8_t barrier; // is there a barrier at instruction epilog?
uint32_t news; // bitmask, wich neoncache are new for this opcode
// fpu cache
int8_t x87cache[8]; // cache status for the 8 x87 register behind the fpu stack
int8_t x87reg[8]; // reg used for x87cache entry
int8_t mmxcache[8]; // cache status for the 8 MMX registers
sse_cache_t ssecache[16]; // cache status for the 16 SSE(2) registers
int8_t fpuused[24]; // all 10..31 & 0..1 double reg from fpu, used by x87, sse and mmx
int8_t x87stack; // cache stack counter
int8_t mmxcount; // number of mmx register used (not both mmx and x87 at the same time)
int8_t fpu_scratch; // scratch counter
int8_t fpu_extra_qscratch; // some opcode need an extra quad scratch register
int8_t fpu_reg; // x87/sse/mmx reg counter
} extcache_t;
typedef struct flagcache_s {
int pending; // is there a pending flags here, or to check?
int dfnone; // if defered flags is already set to df_none
} flagcache_t;
typedef struct instruction_rv64_s {
instruction_x64_t x64;
uintptr_t address; // (start) address of the arm emited instruction
uintptr_t epilog; // epilog of current instruction (can be start of next, or barrier stuff)
int size; // size of the arm emited instruction
int size2; // size of the arm emited instrucion after pass2
int pred_sz; // size of predecessor list
int *pred; // predecessor array
uintptr_t mark, mark2, mark3;
uintptr_t markf;
uintptr_t markseg;
uintptr_t marklock;
int pass2choice;// value for choices that are fixed on pass2 for pass3
uintptr_t natcall;
int retn;
int barrier_maybe;
flagcache_t f_exit; // flags status at end of intruction
extcache_t e; // extcache at end of intruction (but before poping)
flagcache_t f_entry; // flags status before the instruction begin
} instruction_rv64_t;
typedef struct dynarec_rv64_s {
instruction_rv64_t* insts;
int32_t size;
int32_t cap;
uintptr_t start; // start of the block
uint32_t isize; // size in byte of x64 instructions included
void* block; // memory pointer where next instruction is emited
uintptr_t native_start; // start of the arm code
size_t native_size; // size of emitted arm code
uintptr_t last_ip; // last set IP in RIP (or NULL if unclean state) TODO: move to a cache something
uint64_t* table64; // table of 64bits value
int table64size;// size of table (will be appended at end of executable code)
int table64cap;
uintptr_t tablestart;
flagcache_t f;
extcache_t e; // cache for the 10..31 0..1 double reg from fpu, plus x87 stack delta
uintptr_t* next; // variable array of "next" jump address
int next_sz;
int next_cap;
int* predecessor;// single array of all predecessor
dynablock_t* dynablock;
instsize_t* instsize;
size_t insts_size; // size of the instruction size array (calculated)
uint8_t smread; // for strongmem model emulation
uint8_t smwrite; // for strongmem model emulation
uintptr_t forward; // address of the last end of code while testing forward
uintptr_t forward_to; // address of the next jump to (to check if everything is ok)
int32_t forward_size; // size at the forward point
int forward_ninst; // ninst at the forward point
} dynarec_rv64_t;
// convert idx (0..24) to reg index (10..31 0..1)
#define EXTREG(A) (((A)+10)&31)
// convert reg index (10..31 0..1) or idx (0..24)
#define EXTIDX(A) (((A)-10)&31)
void add_next(dynarec_rv64_t *dyn, uintptr_t addr);
uintptr_t get_closest_next(dynarec_rv64_t *dyn, uintptr_t addr);
int is_nops(dynarec_rv64_t *dyn, uintptr_t addr, int n);
int is_instructions(dynarec_rv64_t *dyn, uintptr_t addr, int n);
int Table64(dynarec_rv64_t *dyn, uint64_t val); // add a value to etable64 (if needed) and gives back the imm19 to use in LDR_literal
void CreateJmpNext(void* addr, void* next);
#define GO_TRACE() \
GETIP_(ip); \
MV(A1, xRIP); \
STORE_XEMU_CALL(); \
MOV64x(A2, 1); \
CALL(PrintTrace, -1); \
LOAD_XEMU_CALL()
#endif //__DYNAREC_RV64_PRIVATE_H_

View File

@ -0,0 +1,414 @@
#ifndef __RV64_EMITTER_H__
#define __RV64_EMITTER_H__
/*
RV64 Emitter
*/
// RV64 ABI
/*
reg name description saver
------------------------------------------------------
x0 zero Hard-wired zero
x1 ra Return address Caller
x2 sp Stack pointer Callee
x3 gp Global pointer
x4 tp Thread pointer
x57 t02 Temporaries Caller
x8 s0/fp Saved register/frame pointer Callee
x9 s1 Saved register Callee
x1011 a01 Function arguments/return val. Caller
x1217 a27 Function arguments Caller
x1827 s211 Saved registers Callee
x2831 t36 Temporaries Caller
-------------------------------------------------------
f07 ft07 FP temporaries Caller
f89 fs01 FP saved registers Callee
f1011 fa01 FP arguments/return values Caller
f1217 fa27 FP arguments Caller
f1827 fs211 FP saved registers Callee
f2831 ft811 FP temporaries Caller
*/
// x86 Register mapping
#define xRAX 16
#define xRCX 17
#define xRDX 18
#define xRBX 19
#define xRSP 20
#define xRBP 21
#define xRSI 22
#define xRDI 23
#define xR8 24
#define xR9 25
#define xR10 26
#define xR11 27
#define xR12 28
#define xR13 29
#define xR14 30
#define xR15 31
#define xFlags 8
#define xRIP 7
// 32bits version
#define wEAX xRAX
#define wECX xRCX
#define wEDX xRDX
#define wEBX xRBX
#define wESP xRSP
#define wEBP xRBP
#define wESI xRSI
#define wEDI xRDI
#define wR8 xR8
#define wR9 xR9
#define wR10 xR10
#define wR11 xR11
#define wR12 xR12
#define wR13 xR13
#define wR14 xR14
#define wR15 xR15
#define wFlags xFlags
// scratch registers
#define x1 11
#define x2 12
#define x3 13
#define x4 14
#define x5 15
#define x6 6
// used to clear the upper 32bits
#define xMASK 5
// 32bits version of scratch
#define w1 x1
#define w2 x2
#define w3 x3
#define w4 x4
#define w5 x5
#define w6 x6
// emu is r10
#define xEmu 10
// RV64 RA
#define xRA 1
#define xSP 2
// RV64 args
#define A0 10
#define A1 11
#define A2 12
#define A3 13
#define A4 14
#define A5 15
#define A6 16
#define A7 17
// xZR reg is 0
#define xZR 0
#define wZR xZR
// replacement for F_OF internaly, using a reserved bit. Need to use F_OF2 internaly, never F_OF directly!
#define F_OF2 F_res3
// split a 32bits value in 20bits + 12bits, adjust the upper part is 12bits is negative
#define SPLIT20(A) (((A)+0x800)>>12)
#define SPLIT12(A) ((A)&0xfff)
// MOV64x/MOV32w is quite complex, so use a function for this
#define MOV64x(A, B) rv64_move64(dyn, ninst, A, B)
#define MOV32w(A, B) rv64_move32(dyn, ninst, A, B, 1)
#define MOV64xw(A, B) if(rex.w) {MOV64x(A, B);} else {MOV32w(A, B);}
// ZERO the upper part
#define ZEROUP(r) AND(r, r, xMASK)
#define R_type(funct7, rs2, rs1, funct3, rd, opcode) ((funct7)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode))
#define I_type(imm12, rs1, funct3, rd, opcode) ((imm12)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode))
#define S_type(imm12, rs2, rs1, funct3, opcode) (((imm12)>>5)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | ((imm12)&31)<<7 | (opcode))
#define B_type(imm13, rs2, rs1, funct3, opcode) ((((imm13)>>12)&1)<<31 | (((imm13)>>5)&63)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (((imm13)>>1)&15)<<8 | (((imm13)>>11)&1)<<7 | (opcode))
#define U_type(imm32, rd, opcode) (((imm32)>>12)<<12 | (rd)<<7 | (opcode))
#define J_type(imm21, rd, opcode) ((((imm21)>>20)&1)<<31 | (((imm21)>>1)&0b1111111111)<<21 | (((imm21)>>11)&1)<<20 | (((imm21)>>12)&0b11111111)<<12 | (rd)<<7 | (opcode))
// RV32I
// put imm20 in the [31:12] bits of rd, zero [11:0] and sign extend bits31
#define LUI(rd, imm20) EMIT(U_type((imm20)<<12, rd, 0b0110111))
// put PC+imm20 in rd
#define AUIPC(rd, imm20) EMIT(U_type((imm20)<<12, rd, 0b0010111))
#define JAL_gen(rd, imm21) J_type(imm21, rd, 0b1101111)
// Unconditional branch, no return address set
#define B(imm21) EMIT(JAL_gen(xZR, imm21))
// Unconditional branch, return set to xRA
#define JAL(imm21) EMIT(JAL_gen(xRA, imm21))
#define JALR_gen(rd, rs1, imm12) I_type(imm12, rs1, 0b000, rd, 0b1100111)
// Unconditionnal branch to r, no return address set
#define BR(r) EMIT(JALR_gen(xZR, r, 0))
// Unconditionnal branch to r+i12, no return address set
#define BR_I12(r, imm12) EMIT(JALR_gen(xZR, r, (imm12)&0b111111111111))
// Unconditionnal branch to r, return address set to xRA
#define JALR(r) EMIT(JALR_gen(xRA, r, 0))
// Unconditionnal branch to r+i12, return address set to xRA
#define JALR_I12(r, imm12) EMIT(JALR_gen(xRA, r, (imm12)&0b111111111111))
// rd = rs1 + imm12
#define ADDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0010011))
// rd = rs1 - imm12 (pseudo instruction)
#define SUBI(rd, rs1, imm12) EMIT(I_type((-(imm12))&0b111111111111, rs1, 0b000, rd, 0b0010011))
// rd = (rs1<imm12)?1:0
#define SLTI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b010, rd, 0b0010011))
// rd = (rs1<imm12)?1:0 unsigned
#define SLTIU(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b011, rd, 0b0010011))
// rd = rs1 ^ imm12
#define XORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b100, rd, 0b0010011))
// rd = rs1 | imm12
#define ORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b110, rd, 0b0010011))
// rd = rs1 & imm12
#define ANDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b111, rd, 0b0010011))
// rd = imm12
#define MOV_U12(rd, imm12) ADDI(rd, xZR, imm12)
// nop
#define NOP() ADDI(xZR, xZR, 0)
// rd = rs1 + rs2
#define ADD(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0110011))
// rd = rs1 + rs2
#define ADDW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0111011))
// rd = rs1 + rs2
#define ADDxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011))
// rd = rs1 - rs2
#define SUB(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011))
// rd = rs1 - rs2
#define SUBW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0111011))
// rd = rs1 - rs2
#define SUBxw(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011))
// rd = rs1<<rs2
#define SLL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0110011))
// rd = (rs1<rs2)?1:0
#define SLT(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b010, rd, 0b0110011))
// rd = (rs1<rs2)?1:0 Unsigned
#define SLTU(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b011, rd, 0b0110011))
// rd = rs1 ^ rs2
#define XOR(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b100, rd, 0b0110011))
// rd = rs1 ^ rs2
#define XORxw(rd, rs1, rs2) do{ XOR(rd, rs1, rs2); if (!rex.w) ZEROUP(rd); }while(0)
// rd = rs1>>rs2 logical
#define SRL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0110011))
// rd = rs1>>rs2 arithmetic
#define SRA(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0110011))
// rd = rs1 | rs2
#define OR(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b110, rd, 0b0110011))
// rd = rs1 & rs2
#define AND(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b111, rd, 0b0110011))
// rd = rs1 (pseudo instruction)
#define MV(rd, rs1) ADDI(rd, rs1, 0)
// rd = rs1 (pseudo instruction)
#define MVxw(rd, rs1) if(rex.w) {MV(rd, rs1); } else {AND(rd, rs1, xMASK);}
// rd = !rs1
#define NOT(rd, rs1) XORI(rd, rs1, -1)
// rd = -rs1
#define NEG(rd, rs1) SUB(rd, xZR, rs1)
// rd = rs1 == 0
#define SEQZ(rd, rs1) SLTIU(rd, rs1, 1)
// rd = rs1 != 0
#define SNEZ(rd, rs1) SLTU(rd, xZR, rs1)
#define BEQ(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b000, 0b1100011))
#define BNE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b001, 0b1100011))
#define BLT(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b100, 0b1100011))
#define BGE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b101, 0b1100011))
#define BLTU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b110, 0b1100011))
#define BGEU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b111, 0b1100011))
// TODO: Find a better way to have conditionnal jumps? Imm is a relative jump address, so the the 2nd jump needs to be addapted
#define BEQ_safe(rs1, rs2, imm) if(imm>=-0x1000 && imm<=0x1000) {BEQ(rs1, rs2, imm); NOP();} else {BNE(rs1, rs2, 8); B(imm-4);}
#define BNE_safe(rs1, rs2, imm) if(imm>=-0x1000 && imm<=0x1000) {BNE(rs1, rs2, imm); NOP();} else {BEQ(rs1, rs2, 8); B(imm-4);}
#define BLT_safe(rs1, rs2, imm) if(imm>=-0x1000 && imm<=0x1000) {BLT(rs1, rs2, imm); NOP();} else {BGE(rs2, rs1, 8); B(imm-4);}
#define BGE_safe(rs1, rs2, imm) if(imm>=-0x1000 && imm<=0x1000) {BGE(rs1, rs2, imm); NOP();} else {BLT(rs2, rs1, 8); B(imm-4);}
#define BLTU_safe(rs1, rs2, imm) if(imm>=-0x1000 && imm<=0x1000) {BLTU(rs1, rs2, imm); NOP();} else {BGEU(rs2, rs1, 8); B(imm-4);}
#define BGEU_safe(rs1, rs2, imm) if(imm>=-0x1000 && imm<=0x1000) {BGEU(rs1, rs2, imm); NOP();} else {BLTU(rs2, rs1, 8); B(imm-4);}
#define BEQZ(rs1, imm13) BEQ(rs1, 0, imm13)
#define BNEZ(rs1, imm13) BNE(rs1, 0, imm13)
#define BEQZ_safe(rs1, imm) if(imm>=-0x1000 && imm<=0x1000) {BEQZ(rs1, imm); NOP();} else {BNEZ(rs1, 8); B(imm-4);}
#define BNEZ_safe(rs1, imm) if(imm>=-0x1000 && imm<=0x1000) {BNEZ(rs1, imm); NOP();} else {BEQZ(rs1, 8); B(imm-4);}
// rd = 4-bytes[rs1+imm12] signed extended
#define LW(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, rd, 0b0000011))
// rd = 2-bytes[rs1+imm12] signed extended
#define LH(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b001, rd, 0b0000011))
// rd = byte[rs1+imm12] signed extended
#define LB(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b000, rd, 0b0000011))
// rd = 2-bytes[rs1+imm12] zero extended
#define LHU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b101, rd, 0b0000011))
// rd = byte[rs1+imm12] zero extended
#define LBU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b100, rd, 0b0000011))
// byte[rs1+imm12] = rs2
#define SB(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b000, 0b0100011))
// 2-bytes[rs1+imm12] = rs2
#define SH(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b001, 0b0100011))
// 4-bytes[rs1+imm12] = rs2
#define SW(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011))
#define PUSH1(reg) do {SD(reg, xRSP, -8); SUBI(xRSP, xRSP, 8);} while(0)
#define POP1(reg) do {LD(reg, xRSP, 0); ADDI(xRSP, xRSP, 8);}while(0)
#define FENCE_gen(pred, succ) (((pred)<<24) | ((succ)<<20) | 0b0001111)
#define FENCE() EMIT(FENCE_gen(3, 3))
#define FENCE_I_gen() ((0b001<<12) | 0b0001111)
#define FENCE_I() EMIT(FENCE_I_gen())
// RV64I
#define LWU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b110, rd, 0b0000011))
// rd = [rs1 + imm12]
#define LD(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011, rd, 0b0000011))
// rd = [rs1 + imm12]
#define LDxw(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011<<(1-rex.w), rd, 0b0000011))
// [rs1 + imm12] = rs2
#define SD(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b011, 0b0100011))
// [rs1 + imm12] = rs2
#define SDxw(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010+rex.w, 0b0100011))
// Shift Left Immediate
#define SLLI(rd, rs1, imm6) EMIT(I_type(imm6, rs1, 0b001, rd, 0b0010011))
// Shift Right Logical Immediate
#define SRLI(rd, rs1, imm6) EMIT(I_type(imm6, rs1, 0b101, rd, 0b0010011))
// Shift Right Arithmetic Immediate
#define SRAI(rd, rs1, imm6) EMIT(I_type((imm6)|(0b010000<<6), rs1, 0b101, rd, 0b0010011))
// rd = rs1 + imm12
#define ADDIW(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0011011))
// rd = rs1 + imm12
#define ADDIxw(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, rex.w?0b0010011:0b0011011))
#define SEXT_W(rd, rs1) ADDIW(rd, rs1, 0)
// rd = rs1<<rs2
#define SLLW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0111011))
// rd = rs1>>rs2 logical
#define SRLW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0111011))
// rd = rs1>>rs2 arithmetic
#define SRAW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0111011))
#define SRAxw(rd, rs1, rs2) if(rex.w) {SRA(rd, rs1, rs2);} else {SRAW(rd, rs1, rs2); ZEROUP(rd);}
// Shift Left Immediate, 32-bit, sign-extended
#define SLLIW(rd, rs1, imm5) EMIT(I_type(imm5, rs1, 0b001, rd, 0b0011011))
// Shift Left Immediate
#define SLLIxw(rd, rs1, imm) if (rex.w) { SLLI(rd, rs1, imm); } else { SLLIW(rd, rs1, imm); }
// Shift Right Logical Immediate, 32-bit, sign-extended
#define SRLIW(rd, rs1, imm5) EMIT(I_type(imm5, rs1, 0b101, rd, 0b0011011))
// Shift Right Logical Immediate
#define SRLIxw(rd, rs1, imm) if (rex.w) { SRLI(rd, rs1, imm); } else { SRLIW(rd, rs1, imm); }
// Shift Right Arithmetic Immediate, 32-bit, sign-extended
#define SRAIW(rd, rs1, imm5) EMIT(I_type((imm5)|(0b0100000<<5), rs1, 0b101, rd, 0b0011011))
// Shift Right Arithmetic Immediate
#define SRAIxw(rd, rs1, imm) if (rex.w) { SRAI(rd, rs1, imm); } else { SRAIW(rd, rs1, imm); }
#define CSRRW(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b001, rd, 0b1110011))
#define CSRRS(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b010, rd, 0b1110011))
#define CSRRC(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b011, rd, 0b1110011))
#define CSRRWI(rd, imm, csr) EMIT(I_type(csr, imm, 0b101, rd, 0b1110011))
#define CSRRSI(rd, imm, csr) EMIT(I_type(csr, imm, 0b110, rd, 0b1110011))
#define CSRRCI(rd, imm, csr) EMIT(I_type(csr, imm, 0b111, rd, 0b1110011))
// RV32M
// rd =(lower) rs1 * rs2 (both signed)
#define MUL(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, 0b0110011))
// rd =(upper) rs1 * rs2 (both signed)
#define MULH(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b001, rd, 0b0110011))
// rd =(upper) (signed)rs1 * (unsigned)rs2
#define MULHSU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b010, rd, 0b0110011))
// rd =(upper) rs1 * rs2 (both unsigned)
#define MULHU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b011, rd, 0b0110011))
// rd =(upper) rs1 / rs2
#define DIV(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0110011))
#define DIVU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0110011))
// rd = rs1 mod rs2
#define REM(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0110011))
#define REMU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0110011))
// RV64M
// rd = rs1 * rs2
#define MULW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, 0b0111011))
// rd = rs1 * rs2
#define MULxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011))
// rd = rs1 / rs2
#define DIVW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0111011))
#define DIVxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, rex.w?0b0110011:0b0111011))
#define DIVUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0111011))
#define DIVUxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, rex.w?0b0110011:0b0111011))
// rd = rs1 mod rs2
#define REMW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0111011))
#define REMxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, rex.w?0b0110011:0b0111011))
#define REMUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0111011))
#define REMUxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, rex.w?0b0110011:0b0111011))
#define AQ_RL(f5, aq, rl) ((f5 << 2) | ((aq&1) << 1) | (rl&1))
// RV32A
#define LR_W(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010, rd, 0b0101111))
#define SC_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
// RV64A
#define LR_D(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111))
#define SC_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
#define LRxw(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010|rex.w, rd, 0b0101111))
#define SCxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010|rex.w, rd, 0b0101111))
// RV32F
// Read round mode
#define FRRM(rd) CSRRS(rd, xZR, 0x002)
// Swap round mode with rd
#define FSRM(rd) CSRRWI(rd, 0b111, 0x002)
// load single precision from rs1+imm12 to frd
#define FLW(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, frd, 0b0000111))
// store single precision frs2 to rs1+imm12
#define FSW(frs2, rs1, imm12) EMIT(S_type(imm12, frs2, rs1, 0b010, 0b0100111))
// store rs1 with rs2 sign bit to rd
#define FSGNJS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b000, rd, 0b1010011))
// move rs1 to rd
#define FMVS(rd, rs1) FSGNJS(rd, rs1, rs1)
// store rs1 with oposite rs2 sign bit to rd
#define FSGNJNS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b001, rd, 0b1010011))
// -rs1 => rd
#define FNEGS(rd, rs1) FSGNJNS(rd, rs1, rs1)
// store rs1 with rs1^rs2 sign bit to rd
#define FSGNJXS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b1010011))
// |rs1| => rd
#define FABSS(rd, rs1) FSGNJXS(rd, rs1, rs1)
// Move from Single
#define FMVXW(rd, frs1) EMIT(R_type(0b1110000, 0b00000, frs1, 0b000, rd, 0b1010011))
// Move to Single
#define FMVWX(frd, rs1) EMIT(R_type(0b1111000, 0b00000, rs1, 0b000, frd, 0b1010011))
// RV32D
// load double precision from rs1+imm12 to frd
#define FLD(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011, frd, 0b0000111))
// store double precision frs2 to rs1+imm12
#define FSD(frs2, rs1, imm12) EMIT(S_type(imm12, frs2, rs1, 0b011, 0b0100111))
// Convert Double frs1 to Single frd
#define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b000, frd, 0b1010011))
// Convert Single frs1 to Double frd
#define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b000, frd, 0b1010011))
// store rs1 with rs2 sign bit to rd
#define FSGNJD(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b000, rd, 0b1010011))
// move rs1 to rd
#define FMVD(rd, rs1) FSGNJD(rd, rs1, rs1)
// store rs1 with oposite rs2 sign bit to rd
#define FSGNJND(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b001, rd, 0b1010011))
// -rs1 => rd
#define FNEGD(rd, rs1) FSGNJND(rd, rs1, rs1)
// store rs1 with rs1^rs2 sign bit to rd
#define FSGNJXD(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b010, rd, 0b1010011))
// |rs1| => rd
#define FABSD(rd, rs1) FSGNJXD(rd, rs1, rs1)
//RV64D
// Move from Double
#define FMVXD(rd, frs1) EMIT(R_type(0b1110001, 0b00000, frs1, 0b000, rd, 0b1010011))
// Move to Double
#define FMVDX(frd, rs1) EMIT(R_type(0b1111001, 0b00000, rs1, 0b000, frd, 0b1010011))
#endif //__RV64_EMITTER_H__

View File

@ -0,0 +1,64 @@
//riscv epilog for dynarec
//Save stuff, prepare stack and register
//called with pointer to emu as 1st parameter
//and address to jump to as 2nd parameter
.text
.align 4
.global rv64_epilog
.global rv64_epilog_fast
rv64_epilog:
//update register -> emu
sd x16, (a0)
sd x17, 8(a0)
sd x18, 16(a0)
sd x19, 24(a0)
sd x20, 32(a0)
sd x21, 40(a0)
sd x22, 48(a0)
sd x23, 56(a0)
sd x24, 64(a0)
sd x25, 72(a0)
sd x26, 80(a0)
sd x27, 88(a0)
sd x28, 96(a0)
sd x29, 104(a0)
sd x30, 112(a0)
sd x31, 120(a0)
// adjust flags bit 5 -> bit 11
li x5, ~(1<<11)
and x8, x8, x5
andi x5, x8, 1<<5
slli x5, x5, 11-5
or x8, x8, x5
sd x8, 128(a0) //xFlags
sd x7, 136(a0) // put back reg value in emu, including EIP (so x7 must be EIP now)
// fallback to epilog_fast now, just restoring saved regs
rv64_epilog_fast:
ld ra, (sp) // save ra
ld x8, 8(sp) // save fp
ld x18, 16(sp)
ld x19, 24(sp)
ld x20, 32(sp)
ld x21, 40(sp)
ld x22, 48(sp)
ld x23, 56(sp)
ld x24, 64(sp)
ld x25, 72(sp)
ld x26, 80(sp)
ld x27, 88(sp)
fld f18, (12*8)(sp)
fld f19, (13*8)(sp)
fld f20, (14*8)(sp)
fld f21, (15*8)(sp)
fld f22, (16*8)(sp)
fld f23, (17*8)(sp)
fld f24, (18*8)(sp)
fld f25, (19*8)(sp)
fld f26, (20*8)(sp)
fld f27, (21*8)(sp)
addi sp, sp, (8 * 22)
//end, return...
ret

View File

@ -0,0 +1,173 @@
// RV64 lock helper
// there is 2 part: read and write
// write return 0 on success, 1 on fail (value has been changed)
.text
.align 4
.global rv64_lock_xchg_dd
.global rv64_lock_xchg_d
.global rv64_lock_storeifnull
.global rv64_lock_storeifnull_d
.global rv64_lock_storeifref
.global rv64_lock_storeifref_d
.global rv64_lock_storeifref2_d
.global rv64_lock_decifnot0b
.global rv64_lock_storeb
.global rv64_lock_incif0
.global rv64_lock_decifnot0
.global rv64_lock_store
.global rv64_lock_cas_d
.global rv64_lock_cas_dd
.global rv64_lock_cas_dq
rv64_lock_xchg_dd:
// address is a0, value is a1, return old value in a0
amoswap.d.aqrl a0, a1, (a0)
ret
rv64_lock_xchg_d:
// address is a0, value is a1, return old value in a0
amoswap.w.aqrl a0, a1, (a0)
ret
rv64_lock_storeifnull:
// address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value
fence rw, rw
1:
lr.d a2, (a0)
bnez a2, 2f
sc.d a3, a1, (a0)
bnez a3, 1b
2:
mv a0, a2
ret
rv64_lock_storeifnull_d:
// address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value
fence rw, rw
1:
lr.w a2, (a0)
bnez a2, 2f
sc.w a3, a1, (a0)
bnez a3, 1b
2:
mv a0, a2
ret
rv64_lock_storeifref:
// address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value)
fence rw, rw
1:
lr.d a3, (a0)
bne a2, a3, 2f
sc.d a4, a1, (a0)
bnez a4, 1b
fence rw, rw
mv a0, a1
ret
2:
fence rw, rw
mv a0, a3
ret
rv64_lock_storeifref_d:
// address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value)
fence rw, rw
1:
lr.w a3, (a0)
bne a2, a3, 2f
sc.w a4, a1, (a0)
bnez a4, 1b
mv a0, a1
ret
2:
mv a0, a3
ret
rv64_lock_storeifref2_d:
// address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value
fence rw, rw
1:
lr.w a3, (a0)
bne a2, a3, 2f
sc.w a4, a1, (a0)
bnez a4, 1b
2:
mv a0, a3
ret
rv64_lock_decifnot0b:
fence rw, rw
1:
lr.w a1, (a0)
andi a1, a1, 0xff
beqz a1, 2f
addi a1, a1, -1
sc.w a2, a1, (a0)
bnez a2, 1b
2:
ret
rv64_lock_storeb:
sb a1, 0(a0)
fence rw, rw
ret
rv64_lock_decifnot0:
fence rw, rw
1:
lr.w a1, (a0)
beqz a1, 2f
addi a1, a1, -1
sc.w a2, a1, (a0)
bnez a2, 1b
2:
mv a0, a1
ret
rv64_lock_incif0:
fence rw, rw
1:
lr.w a1, (a0)
bnez a1, 2f
addi a1, a1, 1
sc.w a2, a1, (a0)
bnez a2, 1b
2:
mv a0, a1
ret
rv64_lock_store:
sw a1, 0(a0)
fence rw, rw
ret
rv64_lock_cas_d:
lr.w t0, (a0)
bne t0, a1, 1f
sc.w a0, a2, (a0)
ret
1:
li a0, 1
ret
rv64_lock_cas_dd:
lr.d t0, (a0)
bne t0, a1, 1f
sc.d a0, a2, (a0)
ret
1:
li a0, 1
ret
rv64_lock_cas_dq:
mv a4, a0
lr.d t0, (a0)
bne t0, a1, 1f
sc.d a0, a2, (a0)
sd a3,(a4)
ret
1:
li a0, 1
ret

View File

@ -0,0 +1,60 @@
#ifndef __RV64_LOCK__H__
#define __RV64_LOCK__H__
#include <stdint.h>
// Atomicaly store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
extern int rv64_lock_cas_d(void* p, int32_t ref, int32_t val);
// Atomicaly store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
extern int rv64_lock_cas_dd(void* p, int64_t ref, int64_t val);
// Atomicaly exchange value at [p] with val, return old p
extern uintptr_t rv64_lock_xchg_dd(void* p, uintptr_t val);
// Atomicaly exchange value at [p] with val, return old p
extern uint32_t rv64_lock_xchg_d(void* p, uint32_t val);
// Atomicaly store value to [p] only if [p] is NULL. Return old [p] value
extern uint32_t rv64_lock_storeifnull_d(void*p, uint32_t val);
// Atomicaly store value to [p] only if [p] is NULL. Return old [p] value
extern void* rv64_lock_storeifnull(void*p, void* val);
// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
extern void* rv64_lock_storeifref(void*p, void* val, void* ref);
// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
extern uint32_t rv64_lock_storeifref_d(void*p, uint32_t val, uint32_t ref);
// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
extern uint32_t rv64_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref);
// decrement atomicaly the byte at [p] (but only if p not 0)
extern void rv64_lock_decifnot0b(void*p);
// atomic store (with memory barrier)
extern void rv64_lock_storeb(void*p, uint8_t b);
// increment atomicaly the int at [p] only if it was 0. Return the old value of [p]
extern int rv64_lock_incif0(void*p);
// decrement atomicaly the int at [p] (but only if p not 0)
extern int rv64_lock_decifnot0(void*p);
// atomic store (with memory barrier)
extern void rv64_lock_store(void*p, uint32_t v);
// (mostly) Atomicaly store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
extern int rv64_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2);
// Not defined in assembler but in dynarec_rv64_functions
uint8_t extract_byte(uint32_t val, void* address);
uint32_t insert_byte(uint32_t val, uint8_t b, void* address);
uint16_t extract_half(uint32_t val, void* address);
uint32_t insert_half(uint32_t val, uint16_t h, void* address);
uint8_t rv64_lock_xchg_b(void* addr, uint8_t v);
extern int rv64_lock_cas_b(void* p, uint8_t ref, uint8_t val);
extern int rv64_lock_cas_h(void* p, uint16_t ref, uint16_t val);
#endif //__RV64_LOCK__H__

View File

@ -0,0 +1,51 @@
//riscv update linker table for dynarec
//called with pointer to emu as 1st parameter
//and address of table to as 2nd parameter
//ip is at r12
.text
.align 4
.extern LinkNext
.global rv64_next
.8byte 0 // NULL pointer before rv64_next, for getDB
rv64_next:
// emu is a0
// IP address is a1
addi sp, sp, -(8 * 10)
sd a0, (sp)
sd a1, 8(sp)
sd x5, 16(sp)
sd x7, 24(sp)
sd x16, 32(sp)
sd x17, 40(sp)
sd x28, 48(sp)
sd x29, 56(sp)
sd x30, 64(sp)
sd x31, 72(sp)
mv a2, ra // "from" is in ra, so put in a2
addi a3, sp, 24 // a3 is address to change rip
// call the function
1:
auipc a4, %pcrel_hi(LinkNext)
jalr a4, %pcrel_lo(1b)
// preserve return value
mv a3, a0
// pop regs
ld a0, (sp)
ld a1, 8(sp)
ld x5, 16(sp)
ld x7, 24(sp)
ld x16, 32(sp)
ld x17, 40(sp)
ld x28, 48(sp)
ld x29, 56(sp)
ld x30, 64(sp)
ld x31, 72(sp)
addi sp, sp, (8 * 10)
// return offset is jump address
jr a3

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
#ifndef _RV64_PRINTER_H_
#define _RV64_PRINTER_H_
#include <stdint.h>
#include<stdbool.h>
const char* rv64_print(uint32_t data, uint64_t addr);
#endif //_RV64_PRINTER_H_

View File

@ -0,0 +1,65 @@
//arm prologue for dynarec
//Save stuff, prepare stack and register
//called with pointer to emu as 1st parameter
//and address to jump to as 2nd parameter
.text
.align 4
.extern rv64_next
.global rv64_prolog
rv64_prolog:
//save all 18 used register
addi sp, sp, -(8 * 22)
sd ra, (sp) // save ra
sd x8, 8(sp) // save fp
sd x18, 16(sp)
sd x19, 24(sp)
sd x20, 32(sp)
sd x21, 40(sp)
sd x22, 48(sp)
sd x23, 56(sp)
sd x24, 64(sp)
sd x25, 72(sp)
sd x26, 80(sp)
sd x27, 88(sp)
fsd f18, (12*8)(sp)
fsd f19, (13*8)(sp)
fsd f20, (14*8)(sp)
fsd f21, (15*8)(sp)
fsd f22, (16*8)(sp)
fsd f23, (17*8)(sp)
fsd f24, (18*8)(sp)
fsd f25, (19*8)(sp)
fsd f26, (20*8)(sp)
fsd f27, (21*8)(sp)
//setup emu -> register
ld x16, (a0)
ld x17, 8(a0)
ld x18, 16(a0)
ld x19, 24(a0)
ld x20, 32(a0)
ld x21, 40(a0)
ld x22, 48(a0)
ld x23, 56(a0)
ld x24, 64(a0)
ld x25, 72(a0)
ld x26, 80(a0)
ld x27, 88(a0)
ld x28, 96(a0)
ld x29, 104(a0)
ld x30, 112(a0)
ld x31, 120(a0)
ld x8, 128(a0) //xFlags
ld x7, 136(a0) // xRIP
// // adjust flags bit 11 -> bit 5
andi x8, x8, ~(1<<5) // probably not usefull?
srli x5, x8, 11-5
andi x5, x5, 1<<5
or x8, x8, x5
// setup xMASK
xori x5, x0, -1
srli x5, x5, 32
// jump to block
jalr a1

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __ELFDWARF_PRIVATE_H_
#define __ELFDWARF_PRIVATE_H_

View File

@ -1,11 +1,9 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <elf.h>
#include "rvtransversion.h"
#include "box64version.h"
#include "elfloader.h"
#include "debug.h"
#include "elfload_dump.h"
@ -249,7 +247,7 @@ const char* IdxSymName(elfheader_t *h, int sym)
void DumpMainHeader(Elf64_Ehdr *header, elfheader_t *h)
{
if(rvtrans_dump) {
if(box64_dump) {
printf_dump(LOG_NEVER, "ELF Dump main header\n");
printf_dump(LOG_NEVER, " Entry point = %p\n", (void*)header->e_entry);
printf_dump(LOG_NEVER, " Program Header table offset = %p\n", (void*)header->e_phoff);
@ -275,7 +273,7 @@ void DumpMainHeader(Elf64_Ehdr *header, elfheader_t *h)
void DumpSymTab(elfheader_t *h)
{
if(rvtrans_dump && h->SymTab) {
if(box64_dump && h->SymTab) {
const char* name = ElfName(h);
printf_dump(LOG_NEVER, "ELF Dump SymTab(%zu)=\n", h->numSymTab);
for (size_t i=0; i<h->numSymTab; ++i)
@ -288,7 +286,7 @@ void DumpSymTab(elfheader_t *h)
void DumpDynamicSections(elfheader_t *h)
{
if(rvtrans_dump && h->Dynamic) {
if(box64_dump && h->Dynamic) {
printf_dump(LOG_NEVER, "ELF Dump Dynamic(%zu)=\n", h->numDynamic);
for (size_t i=0; i<h->numDynamic; ++i)
printf_dump(LOG_NEVER, " Dynamic %04zu : %s\n", i, DumpDynamic(h->Dynamic+i));
@ -298,7 +296,7 @@ void DumpDynamicSections(elfheader_t *h)
void DumpDynSym(elfheader_t *h)
{
if(rvtrans_dump && h->DynSym) {
if(box64_dump && h->DynSym) {
const char* name = ElfName(h);
printf_dump(LOG_NEVER, "ELF Dump DynSym(%zu)=\n", h->numDynSym);
for (size_t i=0; i<h->numDynSym; ++i) {
@ -311,7 +309,7 @@ void DumpDynSym(elfheader_t *h)
void DumpDynamicNeeded(elfheader_t *h)
{
if(rvtrans_dump && h->DynStrTab) {
if(box64_dump && h->DynStrTab) {
printf_dump(LOG_NEVER, "ELF Dump DT_NEEDED=====\n");
for (size_t i=0; i<h->numDynamic; ++i)
if(h->Dynamic[i].d_tag==DT_NEEDED) {
@ -323,7 +321,7 @@ void DumpDynamicNeeded(elfheader_t *h)
void DumpDynamicRPath(elfheader_t *h)
{
if(rvtrans_dump && h->DynStrTab) {
if(box64_dump && h->DynStrTab) {
printf_dump(LOG_NEVER, "ELF Dump DT_RPATH/DT_RUNPATH=====\n");
for (size_t i=0; i<h->numDynamic; ++i) {
if(h->Dynamic[i].d_tag==DT_RPATH) {
@ -339,7 +337,7 @@ void DumpDynamicRPath(elfheader_t *h)
void DumpRelTable(elfheader_t *h, int cnt, Elf64_Rel *rel, const char* name)
{
if(rvtrans_dump) {
if(box64_dump) {
const char* elfname = ElfName(h);
printf_dump(LOG_NEVER, "ELF Dump %s Table(%d) @%p\n", name, cnt, rel);
for (int i = 0; i<cnt; ++i)
@ -352,7 +350,7 @@ void DumpRelTable(elfheader_t *h, int cnt, Elf64_Rel *rel, const char* name)
void DumpRelATable(elfheader_t *h, int cnt, Elf64_Rela *rela, const char* name)
{
if(rvtrans_dump && h->rela) {
if(box64_dump && h->rela) {
const char* elfname = ElfName(h);
printf_dump(LOG_NEVER, "ELF Dump %s Table(%d) @%p\n", name, cnt, rela);
for (int i = 0; i<cnt; ++i)

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
@ -13,7 +10,12 @@
#include <link.h>
#include <unistd.h>
#include <errno.h>
#include "rvtransversion.h"
#ifndef _DLFCN_H
#include <dlfcn.h>
#endif
#include "custommem.h"
#include "box64version.h"
#include "elfloader.h"
#include "debug.h"
#include "elfload_dump.h"
@ -22,16 +24,18 @@
#include "x64run.h"
#include "bridge.h"
#include "wrapper.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "library.h"
#include "x64emu.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "callback.h"
#include "rvtransstack.h"
#include "custommem.h"
#include "box64stack.h"
#include "wine_tools.h"
#include "dictionnary.h"
#include "symbols.h"
#ifdef DYNAREC
#include "dynablock.h"
#endif
#include "../emu/x64emu_private.h"
#include "x64tls.h"
@ -40,7 +44,7 @@ void* my__IO_2_1_stdin_ = NULL;
void* my__IO_2_1_stdout_ = NULL;
// return the index of header (-1 if it doesn't exist)
int getElfIndex(rvtranscontext_t* ctx, elfheader_t* head) {
int getElfIndex(box64context_t* ctx, elfheader_t* head) {
for (int i=0; i<ctx->elfsize; ++i)
if(ctx->elfs[i]==head)
return i;
@ -53,10 +57,16 @@ elfheader_t* LoadAndCheckElfHeader(FILE* f, const char* name, int exec)
if(!h)
return NULL;
if ((h->path = realpath(name, NULL)) == NULL) {
if ((h->path = box_realpath(name, NULL)) == NULL) {
h->path = (char*)box_malloc(1);
h->path[0] = '\0';
}
h->mapsymbols = NewMapSymbols();
h->weaksymbols = NewMapSymbols();
h->localsymbols = NewMapSymbols();
h->refcnt = 1;
return h;
}
@ -65,6 +75,12 @@ void FreeElfHeader(elfheader_t** head)
if(!head || !*head)
return;
elfheader_t *h = *head;
#ifdef DYNAREC
/*if(h->text) {
dynarec_log(LOG_INFO, "Free Dynarec block for %s\n", h->path);
cleanDBFromAddressRange(my_context, h->text, h->textsz, 1);
}*/ // will be free at the end, no need to free it now
#endif
box_free(h->name);
box_free(h->path);
box_free(h->PHEntries);
@ -76,6 +92,10 @@ void FreeElfHeader(elfheader_t** head)
box_free(h->SymTab);
box_free(h->DynSym);
FreeMapSymbols(&h->mapsymbols);
FreeMapSymbols(&h->weaksymbols);
FreeMapSymbols(&h->localsymbols);
FreeElfMemory(h);
box_free(h);
@ -137,7 +157,7 @@ int CalcLoadAddr(elfheader_t* head)
const char* ElfName(elfheader_t* head)
{
if(!head)
return "(noelf)";
return "box64";
return head->name;
}
const char* ElfPath(elfheader_t* head)
@ -146,15 +166,15 @@ const char* ElfPath(elfheader_t* head)
return NULL;
return head->path;
}
int AllocElfMemory(rvtranscontext_t* context, elfheader_t* head, int mainbin)
int AllocElfMemory(box64context_t* context, elfheader_t* head, int mainbin)
{
uintptr_t offs = 0;
if(!head->vaddr && rvtrans_load_addr) {
offs = rvtrans_load_addr;
rvtrans_load_addr += head->memsz;
rvtrans_load_addr = (rvtrans_load_addr+0x10ffffffLL)&~0xffffffLL;
if(!head->vaddr && box64_load_addr) {
offs = box64_load_addr;
box64_load_addr += head->memsz;
box64_load_addr = (box64_load_addr+0x10ffffffLL)&~0xffffffLL;
}
int log_level = rvtrans_load_addr?LOG_INFO:LOG_DEBUG;
int log_level = box64_load_addr?LOG_INFO:LOG_DEBUG;
if(!offs)
offs = head->vaddr;
if(head->vaddr) {
@ -235,7 +255,7 @@ int AllocElfMemory(rvtranscontext_t* context, elfheader_t* head, int mainbin)
}
} else {
// vaddr is 0, load everything has a One block
if(!offs && rvtrans_wine)
if(!offs && box64_wine)
offs = (uintptr_t)find47bitBlock(head->memsz); // limit to 47bits...
printf_log(log_level, "Allocating 0x%zx memory @%p for Elf \"%s\"\n", head->memsz, (void*)offs, head->name);
void* p = mmap((void*)offs, head->memsz
@ -281,7 +301,7 @@ void FreeElfMemory(elfheader_t* head)
}
}
int LoadElfMemory(FILE* f, rvtranscontext_t* context, elfheader_t* head)
int LoadElfMemory(FILE* f, box64context_t* context, elfheader_t* head)
{
for (size_t i=0; i<head->numPHEntries; ++i) {
if(head->PHEntries[i].p_type == PT_LOAD) {
@ -289,7 +309,7 @@ int LoadElfMemory(FILE* f, rvtranscontext_t* context, elfheader_t* head)
char* dest = (char*)e->p_paddr + head->delta;
printf_log(LOG_DEBUG, "MMap block #%zu @%p offset=%p (0x%zx/0x%zx)\n", i, dest, (void*)e->p_offset, e->p_filesz, e->p_memsz);
void* p = (void*)-1;
if(e->p_memsz==e->p_filesz && !(e->p_align&0xfff)) {
if(e->p_memsz==e->p_filesz && !(e->p_align&(box64_pagesize-1))) {
printf_log(LOG_DEBUG, "MMap block #%zu @%p offset=%p (0x%zx/0x%zx, flags:0x%x)\n", i, dest, (void*)e->p_offset, e->p_filesz, e->p_memsz, e->p_flags);
p = mmap(dest, e->p_filesz, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fileno(f), e->p_offset);
}
@ -303,6 +323,13 @@ int LoadElfMemory(FILE* f, rvtranscontext_t* context, elfheader_t* head)
}
}
}
#ifdef DYNAREC
if(box64_dynarec && (e->p_flags & PF_X)) {
dynarec_log(LOG_DEBUG, "Add ELF eXecutable Memory %p:%p\n", dest, (void*)e->p_memsz);
addDBFromAddressRange((uintptr_t)dest, e->p_memsz);
}
#endif
// zero'd difference between filesz and memsz
/*if(e->p_filesz != e->p_memsz)
memset(dest+e->p_filesz, 0, e->p_memsz - e->p_filesz);*/ //block is already 0'd at creation
@ -326,7 +353,7 @@ int LoadElfMemory(FILE* f, rvtranscontext_t* context, elfheader_t* head)
return 0;
}
int ReloadElfMemory(FILE* f, rvtranscontext_t* context, elfheader_t* head)
int ReloadElfMemory(FILE* f, box64context_t* context, elfheader_t* head)
{
(void)context;
@ -337,7 +364,10 @@ int ReloadElfMemory(FILE* f, rvtranscontext_t* context, elfheader_t* head)
printf_log(LOG_DEBUG, "Re-loading block #%zu @%p (0x%zx/0x%zx)\n", i, dest, e->p_filesz, e->p_memsz);
int ret = fseeko64(f, e->p_offset, SEEK_SET);
if(ret==-1) {printf_log(LOG_NONE, "Fail to (re)seek PT_LOAD part #%zu (offset=%ld, errno=%d/%s)\n", i, e->p_offset, errno, strerror(errno)); return 1;}
uint32_t page_offset = (uintptr_t)dest & (rvtrans_pagesize - 1);
#ifdef DYNAREC
cleanDBFromAddressRange((uintptr_t)dest, e->p_memsz, 0);
#endif
uint32_t page_offset = (uintptr_t)dest & (box64_pagesize - 1);
mprotect(dest - page_offset, e->p_memsz + page_offset, PROT_READ | PROT_WRITE | PROT_EXEC);
setProtection((uintptr_t)dest - page_offset, e->p_memsz + page_offset, PROT_READ | PROT_WRITE | PROT_EXEC);
if(e->p_filesz) {
@ -356,7 +386,7 @@ int ReloadElfMemory(FILE* f, rvtranscontext_t* context, elfheader_t* head)
return 0;
}
int FindR64COPYRel(elfheader_t* h, const char* name, uintptr_t *offs, uint64_t** p, int version, const char* vername)
int FindR64COPYRel(elfheader_t* h, const char* name, uintptr_t *offs, uint64_t** p, size_t size, int version, const char* vername)
{
if(!h)
return 0;
@ -368,7 +398,7 @@ int FindR64COPYRel(elfheader_t* h, const char* name, uintptr_t *offs, uint64_t**
int t = ELF64_R_TYPE(rela[i].r_info);
Elf64_Sym *sym = &h->DynSym[ELF64_R_SYM(rela[i].r_info)];
const char* symname = SymName(h, sym);
if(t==R_X86_64_COPY && symname && !strcmp(symname, name)) {
if(t==R_X86_64_COPY && symname && !strcmp(symname, name) && sym->st_size==size) {
int version2 = h->VerSym?((Elf64_Half*)((uintptr_t)h->VerSym+h->delta))[ELF64_R_SYM(rela[i].r_info)]:-1;
if(version2!=-1) version2 &= 0x7fff;
if(version && !version2) version2=-1; // match a versionned symbol against a global "local" symbol
@ -395,13 +425,22 @@ int RelocateElfREL(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t*
uint64_t *p = (uint64_t*)(rel[i].r_offset + head->delta);
uintptr_t offs = 0;
uintptr_t end = 0;
size_t size = sym->st_size;
//uintptr_t tmp = 0;
int version = head->VerSym?((Elf64_Half*)((uintptr_t)head->VerSym+head->delta))[ELF64_R_SYM(rel[i].r_info)]:-1;
if(version!=-1) version &=0x7fff;
const char* vername = GetSymbolVersion(head, version);
const char* defver = GetDefaultVersion((bind==STB_WEAK)?my_context->weakdefver:my_context->globaldefver, symname);
if(bind==STB_LOCAL) {
offs = sym->st_value + head->delta;
end = offs + sym->st_size;
if(!symname || !symname[0]) {
offs = sym->st_value + head->delta;
end = offs + sym->st_size;
} else {
if(!offs && !end && local_maplib)
GetLocalSymbolStartEnd(local_maplib, symname, &offs, &end, head, version, vername);
if(!offs && !end)
GetLocalSymbolStartEnd(maplib, symname, &offs, &end, head, version, vername);
}
} else {
// this is probably very very wrong. A proprer way to get reloc need to be writen, but this hack seems ok for now
// at least it work for half-life, unreal, ut99, zsnes, Undertale, ColinMcRae Remake, FTL, ShovelKnight...
@ -435,14 +474,14 @@ int RelocateElfREL(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t*
*p += offs;
break;
case R_X86_64_GLOB_DAT:
if(head!=my_context->elfs[0] && !IsGlobalNoWeakSymbolInNative(maplib, symname, version, vername) && FindR64COPYRel(my_context->elfs[0], symname, &globoffs, &globp, version, vername)) {
if(head!=my_context->elfs[0] && !IsGlobalNoWeakSymbolInNative(maplib, symname, version, vername) && FindR64COPYRel(my_context->elfs[0], symname, &globoffs, &globp, size, version, vername)) {
// set global offs / size for the symbol
offs = sym->st_value;
end = offs + sym->st_size;
if(sym->st_size && offs) {
printf_dump(LOG_NEVER, "Apply %s R_X86_64_GLOB_DAT with R_X86_64_COPY @%p/%p (%p/%p -> %p/%p) size=%ld on sym=%s \n", (bind==STB_LOCAL)?"Local":"Global", p, globp, (void*)(p?(*p):0), (void*)(globp?(*globp):0), (void*)(offs + head->delta), (void*)globoffs, sym->st_size, symname);
memmove((void*)globoffs, (void*)offs, sym->st_size); // preapply to copy part from lib to main elf
AddWeakSymbol(GetGlobalData(maplib), symname, offs + head->delta, sym->st_size, version, vername);
AddUniqueSymbol(GetGlobalData(maplib), symname, offs + head->delta, sym->st_size, version, vername);
} else {
printf_dump(LOG_NEVER, "Apply %s R_X86_64_GLOB_DAT with R_X86_64_COPY @%p/%p (%p/%p -> %p/%p) null sized on sym=%s \n", (bind==STB_LOCAL)?"Local":"Global", p, globp, (void*)(p?(*p):0), (void*)(globp?(*globp):0), (void*)offs, (void*)globoffs, symname);
}
@ -454,7 +493,7 @@ int RelocateElfREL(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t*
end = globend;
}
if (!offs) {
if(strcmp(symname, "__gmon_start__") && strcmp(symname, "data_start") && strcmp(symname, "__data_start"))
if(strcmp(symname, "__gmon_start__") && strcmp(symname, "data_start") && strcmp(symname, "__data_start") && strcmp(symname, "collector_func_load"))
printf_log(LOG_NONE, "%s: Global Symbol %s (ver=%d/%s) not found, cannot apply R_X86_64_GLOB_DAT @%p (%p) in %s\n", (bind==STB_WEAK)?"Warning":"Error", symname, version, vername?vername:"(none)", p, *(void**)p, head->name);
} else {
printf_dump(LOG_NEVER, "Apply %s R_X86_64_GLOB_DAT @%p (%p -> %p) on sym=%s (ver=%d/%s)\n", (bind==STB_LOCAL)?"Local":"Global", p, (void*)(p?(*p):0), (void*)offs, symname, version, vername?vername:"(none)");
@ -467,11 +506,11 @@ int RelocateElfREL(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t*
uintptr_t old_offs = offs;
uintptr_t old_end = end;
offs = 0;
GetSymbolStartEnd(GetGlobalData(maplib), symname, &offs, &end, version, vername, 1); // try globaldata symbols first
GetSizedSymbolStartEnd(GetGlobalData(maplib), symname, &offs, &end, size, version, vername, 1, defver); // try globaldata symbols first
if(offs==0) {
GetNoSelfSymbolStartEnd(maplib, symname, &offs, &end, head, version, vername); // get original copy if any
GetNoSelfSymbolStartEnd(maplib, symname, &offs, &end, head, size, version, vername); // get original copy if any
if(!offs && local_maplib)
GetNoSelfSymbolStartEnd(local_maplib, symname, &offs, &end, head, version, vername);
GetNoSelfSymbolStartEnd(local_maplib, symname, &offs, &end, head, size, version, vername);
}
if(!offs) {
offs = old_offs;
@ -479,7 +518,7 @@ int RelocateElfREL(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t*
}
printf_dump(LOG_NEVER, "Apply %s R_X86_64_COPY @%p with sym=%s, @%p size=%ld (", (bind==STB_LOCAL)?"Local":"Global", p, symname, (void*)offs, sym->st_size);
memmove(p, (void*)offs, sym->st_size);
if(rvtrans_dump) {
if(box64_dump) {
uint64_t *k = (uint64_t*)p;
for (unsigned j=0; j<((sym->st_size>128u)?128u:sym->st_size); j+=8, ++k)
printf_dump(LOG_NEVER, "%s0x%016lX", j?" ":"", *k);
@ -503,6 +542,36 @@ int RelocateElfREL(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t*
*p += offs;
}
break;
#if 0
case R_X86_64_JUMP_SLOT:
// apply immediatly for gobject closure marshal or for LOCAL binding. Also, apply immediatly if it doesn't jump in the got
tmp = (uintptr_t)(*p);
if (bind==STB_LOCAL
|| ((symname && strstr(symname, "g_cclosure_marshal_")==symname))
|| !tmp
|| !((tmp>=head->plt && tmp<head->plt_end) || (tmp>=head->gotplt && tmp<head->gotplt_end))
) {
if (!offs) {
if(bind==STB_WEAK) {
printf_log(LOG_INFO, "Warning: Weak Symbol %s not found, cannot apply R_X86_64_JUMP_SLOT @%p (%p)\n", symname, p, *(void**)p);
} else {
printf_log(LOG_NONE, "Error: Symbol %s not found, cannot apply R_X86_64_JUMP_SLOT @%p (%p) in %s\n", symname, p, *(void**)p, head->name);
}
// return -1;
} else {
if(p) {
printf_dump(LOG_NEVER, "Apply %s R_X86_64_JUMP_SLOT @%p with sym=%s (%p -> %p)\n", (bind==STB_LOCAL)?"Local":"Global", p, symname, *(void**)p, (void*)offs);
*p = offs;
} else {
printf_log(LOG_NONE, "Warning, Symbol %s found, but Jump Slot Offset is NULL \n", symname);
}
}
} else {
printf_dump(LOG_NEVER, "Preparing (if needed) %s R_X86_64_JUMP_SLOT @%p (0x%lx->0x%0lx) with sym=%s to be apply later\n", (bind==STB_LOCAL)?"Local":"Global", p, *p, *p+head->delta, symname);
*p += head->delta;
}
break;
#endif
default:
printf_log(LOG_INFO, "Warning, don't know how to handle rel #%d %s (%p)\n", i, DumpRelType(ELF64_R_TYPE(rel[i].r_info)), p);
}
@ -536,13 +605,22 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t
uint64_t *p = (uint64_t*)(rela[i].r_offset + head->delta);
uintptr_t offs = 0;
uintptr_t end = 0;
size_t size = sym->st_size;
elfheader_t* h_tls = NULL;//head;
int version = head->VerSym?((Elf64_Half*)((uintptr_t)head->VerSym+head->delta))[ELF64_R_SYM(rela[i].r_info)]:-1;
if(version!=-1) version &=0x7fff;
const char* vername = GetSymbolVersion(head, version);
const char* defver = GetDefaultVersion((bind==STB_WEAK)?my_context->weakdefver:my_context->globaldefver, symname);
if(bind==STB_LOCAL) {
offs = sym->st_value + head->delta;
end = offs + sym->st_size;
if(!symname || !symname[0]) {
offs = sym->st_value + head->delta;
end = offs + sym->st_size;
} else {
if(!offs && !end && local_maplib)
GetLocalSymbolStartEnd(local_maplib, symname, &offs, &end, head, version, vername);
if(!offs && !end)
GetLocalSymbolStartEnd(maplib, symname, &offs, &end, head, version, vername);
}
} else {
// this is probably very very wrong. A proprer way to get reloc need to be writen, but this hack seems ok for now
// at least it work for half-life, unreal, ut99, zsnes, Undertale, ColinMcRae Remake, FTL, ShovelKnight...
@ -585,11 +663,11 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t
globoffs = offs;
globend = end;
offs = end = 0;
GetSymbolStartEnd(GetGlobalData(maplib), symname, &offs, &end, version, vername, 1); // try globaldata symbols first
GetSizedSymbolStartEnd(GetGlobalData(maplib), symname, &offs, &end, size, version, vername, 1, defver); // try globaldata symbols first
if(!offs && local_maplib)
GetNoSelfSymbolStartEnd(local_maplib, symname, &offs, &end, head, version, vername);
GetNoSelfSymbolStartEnd(local_maplib, symname, &offs, &end, head, size, version, vername);
if(!offs)
GetNoSelfSymbolStartEnd(maplib, symname, &offs, &end, head, version, vername);
GetNoSelfSymbolStartEnd(maplib, symname, &offs, &end, head, size, version, vername);
if(!offs) {offs = globoffs; end = globend;}
if(offs) {
// add r_addend to p?
@ -601,7 +679,7 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t
}
break;
case R_X86_64_GLOB_DAT:
if(head!=my_context->elfs[0] && !IsGlobalNoWeakSymbolInNative(maplib, symname, version, vername) && FindR64COPYRel(my_context->elfs[0], symname, &globoffs, &globp, version, vername)) {
if(head!=my_context->elfs[0] && !IsGlobalNoWeakSymbolInNative(maplib, symname, version, vername) && FindR64COPYRel(my_context->elfs[0], symname, &globoffs, &globp, size, version, vername)) {
// set global offs / size for the symbol
offs = sym->st_value + head->delta;
end = offs + sym->st_size;
@ -610,7 +688,7 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t
(bind==STB_LOCAL)?"Local":"Global", p, globp, (void*)(p?(*p):0),
(void*)(globp?(*globp):0), (void*)offs, (void*)globoffs, sym->st_size, symname, version, vername?vername:"(none)");
//memmove((void*)globoffs, (void*)offs, sym->st_size); // preapply to copy part from lib to main elf
AddWeakSymbol(GetGlobalData(maplib), symname, offs, sym->st_size, version, vername);
AddUniqueSymbol(GetGlobalData(maplib), symname, offs, sym->st_size, version, vername);
} else {
printf_dump(LOG_NEVER, "Apply %s R_X86_64_GLOB_DAT with R_X86_64_COPY @%p/%p (%p/%p -> %p/%p) null sized on sym=%s (ver=%d/%s)\n",
(bind==STB_LOCAL)?"Local":"Global", p, globp, (void*)(p?(*p):0),
@ -624,7 +702,7 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t
end = globend;
}
if (!offs) {
if(strcmp(symname, "__gmon_start__") && strcmp(symname, "data_start") && strcmp(symname, "__data_start"))
if(strcmp(symname, "__gmon_start__") && strcmp(symname, "data_start") && strcmp(symname, "__data_start") && strcmp(symname, "collector_func_load"))
printf_log((bind==STB_WEAK)?LOG_INFO:LOG_NONE, "%s: Global Symbol %s not found, cannot apply R_X86_64_GLOB_DAT @%p (%p) in %s\n", (bind==STB_WEAK)?"Warning":"Error", symname, p, *(void**)p, head->name);
} else {
printf_dump(LOG_NEVER, "Apply %s R_X86_64_GLOB_DAT @%p (%p -> %p) on sym=%s (ver=%d/%s)\n", (bind==STB_LOCAL)?"Local":"Global", p, (void*)(p?(*p):0), (void*)offs, symname, version, vername?vername:"(none)");
@ -734,7 +812,7 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t
}
// return -1;
} else {
if(!symname || symname[0]=='\0' || bind==STB_LOCAL)
if(!symname || symname[0]=='\0')
offs = sym->st_value;
if(p) {
int64_t tlsoffset = offs; // it's not an offset in elf memory
@ -763,8 +841,13 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t
}
return bindnow?ret_ok:0;
}
void checkHookedSymbols(lib_t *maplib, elfheader_t* h); // in mallochook.c
int RelocateElf(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t* head)
{
if((head->flags&DF_BIND_NOW) && !bindnow) {
bindnow = 1;
printf_log(LOG_DEBUG, "Forcing %s to Bind Now\n", head->name);
}
if(head->rel) {
int cnt = head->relsz / head->relent;
DumpRelTable(head, cnt, (Elf64_Rel *)(head->rel + head->delta), "Rel");
@ -779,13 +862,17 @@ int RelocateElf(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t* he
if(RelocateElfRELA(maplib, local_maplib, bindnow, head, cnt, (Elf64_Rela *)(head->rela + head->delta), NULL))
return -1;
}
checkHookedSymbols(maplib, head);
return 0;
}
int RelocateElfPlt(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t* head)
{
int need_resolver = 0;
if((head->flags&DF_BIND_NOW) && !bindnow) {
bindnow = 1;
printf_log(LOG_DEBUG, "Forcing %s to Bind Now\n", head->name);
}
if(head->pltrel) {
int cnt = head->pltsz / head->pltent;
if(head->pltrel==DT_REL) {
@ -868,7 +955,7 @@ uintptr_t GetEntryPoint(lib_t* maplib, elfheader_t* h)
(void)maplib;
uintptr_t ep = h->entrypoint + h->delta;
printf_log(LOG_DEBUG, "Entry Point is %p\n", (void*)ep);
if(rvtrans_dump) {
if(box64_dump) {
printf_dump(LOG_NEVER, "(short) Dump of Entry point\n");
int sz = 64;
uintptr_t lastbyte = GetLastByte(h);
@ -886,23 +973,7 @@ uintptr_t GetLastByte(elfheader_t* h)
void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* weaksymbols, kh_mapsymbols_t* localsymbols, elfheader_t* h)
{
if(rvtrans_dump && h->DynSym) DumpDynSym(h);
int libcef = (strstr(h->name, "libcef.so"))?1:0;
//libcef.so is linked with tcmalloc staticaly, but this cannot be easily supported in rvtrans, so hacking some "unlink" here
const char* avoid_libcef[] = {"malloc", "realloc", "free", "calloc", "cfree",
"__libc_malloc", "__libc_calloc", "__libc_free", "__libc_memallign", "__libc_pvalloc",
"__libc_realloc", "__libc_valloc", "__posix_memalign",
"valloc", "pvalloc", "posix_memalign", "malloc_stats", "malloc_usable_size",
/*"mallopt",*/ "localtime_r",
//c++ symbol from libstdc++ too
//"_ZnwmRKSt9nothrow_t", "_ZdaPv", // operator new(unsigned long, std::nothrow_t const&), operator delete[](void*)
//"_Znwm", "_ZdlPv", "_Znam", // operator new(unsigned long), operator delete(void*), operator new[](unsigned long)
//"_ZnwmSt11align_val_t", "_ZnwmSt11align_val_tRKSt9nothrow_t", // operator new(unsigned long, std::align_val_t)
//"_ZnamSt11align_val_t", "_ZnamSt11align_val_tRKSt9nothrow_t", // operator new[](unsigned long, std::align_val_t)
//"_ZdlPvRKSt9nothrow_t", "_ZdaPvSt11align_val_tRKSt9nothrow_t", // more delete operators
//"_ZdlPvmSt11align_val_t", "_ZdaPvRKSt9nothrow_t",
//"_ZdaPvSt11align_val_t", "_ZdlPvSt11align_val_t",
};
if(box64_dump && h->DynSym) DumpDynSym(h);
printf_dump(LOG_NEVER, "Will look for Symbol to add in SymTable(%zu)\n", h->numSymTab);
for (size_t i=0; i<h->numSymTab; ++i) {
const char * symname = h->StrTab+h->SymTab[i].st_name;
@ -911,7 +982,7 @@ void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* wea
int vis = h->SymTab[i].st_other&0x3;
size_t sz = h->SymTab[i].st_size;
if((type==STT_OBJECT || type==STT_FUNC || type==STT_COMMON || type==STT_TLS || type==STT_NOTYPE)
&& (vis==STV_DEFAULT || vis==STV_PROTECTED) && (h->SymTab[i].st_shndx!=0)) {
&& (vis==STV_DEFAULT || vis==STV_PROTECTED || (vis==STV_HIDDEN && bind==STB_LOCAL)) && (h->SymTab[i].st_shndx!=0)) {
if(sz && strstr(symname, "@@")) {
char symnameversionned[strlen(symname)+1];
strcpy(symnameversionned, symname);
@ -921,6 +992,7 @@ void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* wea
p+=2;
symname = AddDictionnary(my_context->versym, symnameversionned);
const char* vername = AddDictionnary(my_context->versym, p);
AddDefaultVersion((bind==STB_WEAK)?my_context->weakdefver:my_context->globaldefver, symname, vername);
if((bind==STB_GNU_UNIQUE /*|| (bind==STB_GLOBAL && type==STT_FUNC)*/) && FindGlobalSymbol(maplib, symname, 2, p))
continue;
uintptr_t offs = (type==STT_TLS)?h->SymTab[i].st_value:(h->SymTab[i].st_value + h->delta);
@ -935,13 +1007,6 @@ void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* wea
}
} else {
int to_add = 1;
if(libcef) {
if(strstr(symname, "_Zn")==symname || strstr(symname, "_Zd")==symname)
to_add = 0;
for(int j=0; j<sizeof(avoid_libcef)/sizeof(avoid_libcef[0]) && to_add; ++j)
if(!strcmp(symname, avoid_libcef[j]))
to_add = 0;
}
if(!to_add || (bind==STB_GNU_UNIQUE && FindGlobalSymbol(maplib, symname, -1, NULL)))
continue;
uintptr_t offs = (type==STT_TLS)?h->SymTab[i].st_value:(h->SymTab[i].st_value + h->delta);
@ -965,20 +1030,18 @@ void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* wea
int type = ELF64_ST_TYPE(h->DynSym[i].st_info);
int vis = h->DynSym[i].st_other&0x3;
if((type==STT_OBJECT || type==STT_FUNC || type==STT_COMMON || type==STT_TLS || type==STT_NOTYPE)
&& (vis==STV_DEFAULT || vis==STV_PROTECTED) && (h->DynSym[i].st_shndx!=0 && h->DynSym[i].st_shndx<=65521)) {
&& (vis==STV_DEFAULT || vis==STV_PROTECTED || (vis==STV_HIDDEN && bind==STB_LOCAL)) && (h->DynSym[i].st_shndx!=0 && h->DynSym[i].st_shndx<=65521)) {
uintptr_t offs = (type==STT_TLS)?h->DynSym[i].st_value:(h->DynSym[i].st_value + h->delta);
size_t sz = h->DynSym[i].st_size;
int version = h->VerSym?((Elf64_Half*)((uintptr_t)h->VerSym+h->delta))[i]:-1;
int add_default = (version!=-1 && (version&0x7fff)>1 && !(version&0x8000) && !GetDefaultVersion((bind==STB_WEAK)?my_context->weakdefver:my_context->globaldefver, symname))?1:0;
if(version!=-1) version &= 0x7fff;
const char* vername = GetSymbolVersion(h, version);
int to_add = 1;
if(libcef) {
if(strstr(symname, "_Zn")==symname || strstr(symname, "_Zd")==symname)
to_add = 0;
for(int j=0; j<sizeof(avoid_libcef)/sizeof(avoid_libcef[0]) && to_add; ++j)
if(!strcmp(symname, avoid_libcef[j]))
to_add = 0;
if(add_default) {
AddDefaultVersion((bind==STB_WEAK)?my_context->weakdefver:my_context->globaldefver, symname, vername);
printf_dump(LOG_NEVER, "Adding Default Version \"%s\" for Symbol\"%s\"\n", vername, symname);
}
int to_add = 1;
if(!to_add || (bind==STB_GNU_UNIQUE && FindGlobalSymbol(maplib, symname, version, vername)))
continue;
printf_dump(LOG_NEVER, "Adding Versionned Symbol(bind=%s) \"%s\" (ver=%d/%s) with offset=%p sz=%zu\n", (bind==STB_LOCAL)?"LOCAL":((bind==STB_WEAK)?"WEAK":"GLOBAL"), symname, version, vername?vername:"(none)", (void*)offs, sz);
@ -988,11 +1051,10 @@ void AddSymbols(lib_t *maplib, kh_mapsymbols_t* mapsymbols, kh_mapsymbols_t* wea
if(bind==STB_WEAK) {
AddSymbol(weaksymbols, symname, offs, sz, version, vername);
} else {
AddWeakSymbol(mapsymbols, symname, offs, sz, version?version:1, vername);
AddSymbol(mapsymbols, symname, offs, sz, version?version:1, vername);
}
}
}
}
/*
@ -1009,8 +1071,10 @@ $PLATFORM Expands to the processor type of the current machine (see the
uname(1) man page description of the -i option). For more details of this token
expansion, see System Specific Shared Objects
*/
int LoadNeededLibs(elfheader_t* h, lib_t *maplib, needed_libs_t* neededlibs, library_t *deplib, int local, int bindnow, rvtranscontext_t *rvtrans, x64emu_t* emu)
int LoadNeededLibs(elfheader_t* h, lib_t *maplib, int local, int bindnow, box64context_t *box64, x64emu_t* emu)
{
if(h->needed) // already done
return 0;
DumpDynamicRPath(h);
// update RPATH first
for (size_t i=0; i<h->numDynamic; ++i)
@ -1060,31 +1124,30 @@ int LoadNeededLibs(elfheader_t* h, lib_t *maplib, needed_libs_t* neededlibs, lib
box_free(platform);
}
if(strchr(rpath, '$')) {
printf_log(LOG_INFO, "RVTRANS: Warning, RPATH with $ variable not supported yet (%s)\n", rpath);
printf_log(LOG_INFO, "BOX64: Warning, RPATH with $ variable not supported yet (%s)\n", rpath);
} else {
printf_log(LOG_DEBUG, "Prepending path \"%s\" to RVTRANS_LD_LIBRARY_PATH\n", rpath);
PrependList(&rvtrans->rvtrans_ld_lib, rpath, 1);
printf_log(LOG_DEBUG, "Prepending path \"%s\" to BOX64_LD_LIBRARY_PATH\n", rpath);
PrependList(&box64->box64_ld_lib, rpath, 1);
}
if(rpath!=rpathref)
box_free(rpath);
}
if(!h->neededlibs && neededlibs)
h->neededlibs = neededlibs;
DumpDynamicNeeded(h);
int cnt = 0;
for (int i=0; i<h->numDynamic; ++i)
if(h->Dynamic[i].d_tag==DT_NEEDED)
++cnt;
const char* nlibs[cnt];
h->needed = new_neededlib(cnt);
if(h == my_context->elfs[0])
my_context->neededlibs = h->needed;
int j=0;
for (int i=0; i<h->numDynamic; ++i)
if(h->Dynamic[i].d_tag==DT_NEEDED)
nlibs[j++] = h->DynStrTab+h->delta+h->Dynamic[i].d_un.d_val;
h->needed->names[j++] = h->DynStrTab+h->delta+h->Dynamic[i].d_un.d_val;
// TODO: Add LD_LIBRARY_PATH and RPATH handling
if(AddNeededLib(maplib, neededlibs, deplib, local, bindnow, nlibs, cnt, rvtrans, emu)) {
if(AddNeededLib(maplib, local, bindnow, h->needed, box64, emu)) {
printf_log(LOG_INFO, "Error loading one of needed lib\n");
if(!allow_missing_libs)
return 1; //error...
@ -1124,6 +1187,40 @@ void RefreshElfTLS(elfheader_t* h)
}
}
}
void MarkElfInitDone(elfheader_t* h)
{
if(h)
h->init_done = 1;
}
void RunElfInitPltResolver(elfheader_t* h, x64emu_t *emu)
{
if(!h || h->init_done)
return;
uintptr_t p = h->initentry + h->delta;
h->init_done = 1;
for(int i=0; i<h->needed->size; ++i) {
library_t *lib = h->needed->libs[i];
elfheader_t *lib_elf = GetElf(lib);
if(lib_elf)
RunElfInitPltResolver(lib_elf, emu);
}
printf_dump(LOG_DEBUG, "Calling Init for %s @%p\n", ElfName(h), (void*)p);
if(h->initentry)
RunSafeFunction(my_context, p, 3, my_context->argc, my_context->argv, my_context->envv);
printf_dump(LOG_DEBUG, "Done Init for %s\n", ElfName(h));
// and check init array now
Elf64_Addr *addr = (Elf64_Addr*)(h->initarray + h->delta);
for (size_t i=0; i<h->initarray_sz; ++i) {
if(addr[i]) {
printf_dump(LOG_DEBUG, "Calling Init[%zu] for %s @%p\n", i, ElfName(h), (void*)addr[i]);
RunSafeFunction(my_context, (uintptr_t)addr[i], 3, my_context->argc, my_context->argv, my_context->envv);
}
}
h->fini_done = 0; // can be fini'd now (in case it was re-inited)
printf_dump(LOG_DEBUG, "All Init Done for %s\n", ElfName(h));
return;
}
void RunElfInit(elfheader_t* h, x64emu_t *emu)
{
@ -1132,7 +1229,7 @@ void RunElfInit(elfheader_t* h, x64emu_t *emu)
// reset Segs Cache
memset(emu->segs_serial, 0, sizeof(emu->segs_serial));
uintptr_t p = h->initentry + h->delta;
rvtranscontext_t* context = GetEmuContext(emu);
box64context_t* context = GetEmuContext(emu);
// Refresh no-file part of TLS in case default value changed
RefreshElfTLS(h);
// check if in deferedInit
@ -1144,29 +1241,35 @@ void RunElfInit(elfheader_t* h, x64emu_t *emu)
context->deferedInitList[context->deferedInitSz++] = h;
return;
}
printf_log(LOG_DEBUG, "Calling Init for %s @%p\n", ElfName(h), (void*)p);
h->init_done = 1;
for(int i=0; i<h->needed->size; ++i) {
library_t *lib = h->needed->libs[i];
elfheader_t *lib_elf = GetElf(lib);
if(lib_elf)
RunElfInit(lib_elf, emu);
}
printf_dump(LOG_DEBUG, "Calling Init for %s @%p\n", ElfName(h), (void*)p);
if(h->initentry)
RunFunctionWithEmu(emu, 0, p, 3, context->argc, context->argv, context->envv);
printf_log(LOG_DEBUG, "Done Init for %s\n", ElfName(h));
printf_dump(LOG_DEBUG, "Done Init for %s\n", ElfName(h));
// and check init array now
Elf64_Addr *addr = (Elf64_Addr*)(h->initarray + h->delta);
for (size_t i=0; i<h->initarray_sz; ++i) {
if(addr[i]) {
printf_log(LOG_DEBUG, "Calling Init[%zu] for %s @%p\n", i, ElfName(h), (void*)addr[i]);
printf_dump(LOG_DEBUG, "Calling Init[%zu] for %s @%p\n", i, ElfName(h), (void*)addr[i]);
RunFunctionWithEmu(emu, 0, (uintptr_t)addr[i], 3, context->argc, context->argv, context->envv);
}
}
h->init_done = 1;
h->fini_done = 0; // can be fini'd now (in case it was re-inited)
printf_log(LOG_DEBUG, "All Init Done for %s\n", ElfName(h));
printf_dump(LOG_DEBUG, "All Init Done for %s\n", ElfName(h));
return;
}
EXPORTDYN
void RunDeferedElfInit(x64emu_t *emu)
{
rvtranscontext_t* context = GetEmuContext(emu);
box64context_t* context = GetEmuContext(emu);
if(!context->deferedInit)
return;
context->deferedInit = 0;
@ -1189,16 +1292,18 @@ void RunElfFini(elfheader_t* h, x64emu_t *emu)
// first check fini array
Elf64_Addr *addr = (Elf64_Addr*)(h->finiarray + h->delta);
for (int i=h->finiarray_sz-1; i>=0; --i) {
printf_log(LOG_DEBUG, "Calling Fini[%d] for %s @%p\n", i, ElfName(h), (void*)addr[i]);
printf_dump(LOG_DEBUG, "Calling Fini[%d] for %s @%p\n", i, ElfName(h), (void*)addr[i]);
RunFunctionWithEmu(emu, 0, (uintptr_t)addr[i], 0);
}
// then the "old-style" fini
if(h->finientry) {
uintptr_t p = h->finientry + h->delta;
printf_log(LOG_DEBUG, "Calling Fini for %s @%p\n", ElfName(h), (void*)p);
printf_dump(LOG_DEBUG, "Calling Fini for %s @%p\n", ElfName(h), (void*)p);
RunFunctionWithEmu(emu, 0, p, 0);
}
h->init_done = 0; // can be re-inited again...
for(int i=0; i<h->needed->size; ++i)
FiniLibrary(h->needed->libs[i], emu);
return;
}
@ -1239,7 +1344,7 @@ int IsAddressInElfSpace(const elfheader_t* h, uintptr_t addr)
}
return 0;
}
elfheader_t* FindElfAddress(rvtranscontext_t *context, uintptr_t addr)
elfheader_t* FindElfAddress(box64context_t *context, uintptr_t addr)
{
for (int i=0; i<context->elfsize; ++i)
if(IsAddressInElfSpace(context->elfs[i], addr))
@ -1256,6 +1361,22 @@ const char* FindNearestSymbolName(elfheader_t* h, void* p, uintptr_t* start, uin
const char* ret = NULL;
uintptr_t s = 0;
uint64_t size = 0;
#ifdef HAVE_TRACE
if(!h) {
if(getProtection((uintptr_t)p)&(PROT_READ)) {
if(*(uint8_t*)(p)==0xCC && *(uint8_t*)(p+1)=='S' && *(uint8_t*)(p+2)=='C') {
ret = getBridgeName(*(void**)(p+3+8));
if(ret) {
if(start)
*start = (uintptr_t)p;
if(sz)
*sz = 32;
}
}
}
return ret;
}
#endif
if(!h || h->fini_done)
return ret;
@ -1331,7 +1452,7 @@ int SameVersionnedSymbol(const char* name1, int ver1, const char* vername1, cons
return 0;
}
void* GetDTatOffset(rvtranscontext_t* context, unsigned long int index, unsigned long int offset)
void* GetDTatOffset(box64context_t* context, unsigned long int index, unsigned long int offset)
{
return (void*)((char*)GetTLSPointer(context, context->elfs[index])+offset);
}
@ -1346,16 +1467,11 @@ uint32_t GetTLSSize(elfheader_t* h)
return h->tlssize;
}
void* GetTLSPointer(rvtranscontext_t* context, elfheader_t* h)
void* GetTLSPointer(box64context_t* context, elfheader_t* h)
{
if(!h->tlssize)
return NULL;
tlsdatasize_t* ptr;
if ((ptr = (tlsdatasize_t*)pthread_getspecific(context->tlskey)) == NULL) {
ptr = (tlsdatasize_t*)fillTLSData(context);
}
if(ptr->tlssize != context->tlssize)
ptr = (tlsdatasize_t*)resizeTLSData(context, ptr);
tlsdatasize_t* ptr = getTLSData(context);
return ptr->data+h->tlsbase;
}
@ -1366,6 +1482,30 @@ void* GetDynamicSection(elfheader_t* h)
return h->Dynamic;
}
#ifdef DYNAREC
dynablock_t* GetDynablocksFromAddress(box64context_t *context, uintptr_t addr)
{
(void)context;
// if we are here, the there is not block in standard "space"
/*dynablocklist_t* ret = getDBFromAddress(addr);
if(ret) {
return ret;
}*/
if(box64_dynarec_forced) {
addDBFromAddressRange(addr, 1);
return getDB(addr);
}
//check if address is in an elf... if yes, grant a block (should I warn)
Dl_info info;
if(dladdr((void*)addr, &info)) {
dynarec_log(LOG_INFO, "Address %p is in a native Elf memory space (function \"%s\" in %s)\n", (void*)addr, info.dli_sname, info.dli_fname);
return NULL;
}
dynarec_log(LOG_INFO, "Address %p not found in Elf memory and is not a native call wrapper\n", (void*)addr);
return NULL;
}
#endif
typedef struct my_dl_phdr_info_s {
void* dlpi_addr;
const char* dlpi_name;
@ -1388,14 +1528,14 @@ GO(4)
// dl_iterate_phdr ...
#define GO(A) \
static uintptr_t my_dl_iterate_phdr_fct_##A = 0; \
static int my_dl_iterate_phdr_##A(struct dl_phdr_info* a, size_t b, void* c)\
{ \
if(!a->dlpi_name) \
return 0; \
if(!a->dlpi_name[0]) /*don't send informations about rvtrans itself*/ \
return 0; \
return RunFunction(my_context, my_dl_iterate_phdr_fct_##A, 3, a, b, c); \
static uintptr_t my_dl_iterate_phdr_fct_##A = 0; \
static int my_dl_iterate_phdr_##A(struct dl_phdr_info* a, size_t b, void* c) \
{ \
if(!a->dlpi_name) \
return 0; \
if(!a->dlpi_name[0]) /*don't send informations about box64 itself*/ \
return 0; \
return (int)RunFunction(my_context, my_dl_iterate_phdr_fct_##A, 3, a, b, c); \
}
SUPER()
#undef GO
@ -1416,7 +1556,7 @@ static void* find_dl_iterate_phdr_Fct(void* fct)
EXPORT int my_dl_iterate_phdr(x64emu_t *emu, void* F, void *data) {
printf_log(LOG_DEBUG, "Call to partially implemented dl_iterate_phdr(%p, %p)\n", F, data);
rvtranscontext_t *context = GetEmuContext(emu);
box64context_t *context = GetEmuContext(emu);
const char* empty = "";
int ret = 0;
for (int idx=0; idx<context->elfsize; ++idx) {
@ -1444,73 +1584,78 @@ void ResetSpecialCaseMainElf(elfheader_t* h)
if(strcmp(symname, "_IO_2_1_stderr_")==0 && ((void*)sym->st_value+h->delta)) {
memcpy((void*)sym->st_value+h->delta, stderr, sym->st_size);
my__IO_2_1_stderr_ = (void*)sym->st_value+h->delta;
printf_log(LOG_DEBUG, "RVTRANS: Set @_IO_2_1_stderr_ to %p\n", my__IO_2_1_stderr_);
printf_log(LOG_DEBUG, "BOX64: Set @_IO_2_1_stderr_ to %p\n", my__IO_2_1_stderr_);
} else
if(strcmp(symname, "_IO_2_1_stdin_")==0 && ((void*)sym->st_value+h->delta)) {
memcpy((void*)sym->st_value+h->delta, stdin, sym->st_size);
my__IO_2_1_stdin_ = (void*)sym->st_value+h->delta;
printf_log(LOG_DEBUG, "RVTRANS: Set @_IO_2_1_stdin_ to %p\n", my__IO_2_1_stdin_);
printf_log(LOG_DEBUG, "BOX64: Set @_IO_2_1_stdin_ to %p\n", my__IO_2_1_stdin_);
} else
if(strcmp(symname, "_IO_2_1_stdout_")==0 && ((void*)sym->st_value+h->delta)) {
memcpy((void*)sym->st_value+h->delta, stdout, sym->st_size);
my__IO_2_1_stdout_ = (void*)sym->st_value+h->delta;
printf_log(LOG_DEBUG, "RVTRANS: Set @_IO_2_1_stdout_ to %p\n", my__IO_2_1_stdout_);
printf_log(LOG_DEBUG, "BOX64: Set @_IO_2_1_stdout_ to %p\n", my__IO_2_1_stdout_);
} else
if(strcmp(symname, "_IO_stderr_")==0 && ((void*)sym->st_value+h->delta)) {
memcpy((void*)sym->st_value+h->delta, stderr, sym->st_size);
my__IO_2_1_stderr_ = (void*)sym->st_value+h->delta;
printf_log(LOG_DEBUG, "RVTRANS: Set @_IO_stderr_ to %p\n", my__IO_2_1_stderr_);
printf_log(LOG_DEBUG, "BOX64: Set @_IO_stderr_ to %p\n", my__IO_2_1_stderr_);
} else
if(strcmp(symname, "_IO_stdin_")==0 && ((void*)sym->st_value+h->delta)) {
memcpy((void*)sym->st_value+h->delta, stdin, sym->st_size);
my__IO_2_1_stdin_ = (void*)sym->st_value+h->delta;
printf_log(LOG_DEBUG, "RVTRANS: Set @_IO_stdin_ to %p\n", my__IO_2_1_stdin_);
printf_log(LOG_DEBUG, "BOX64: Set @_IO_stdin_ to %p\n", my__IO_2_1_stdin_);
} else
if(strcmp(symname, "_IO_stdout_")==0 && ((void*)sym->st_value+h->delta)) {
memcpy((void*)sym->st_value+h->delta, stdout, sym->st_size);
my__IO_2_1_stdout_ = (void*)sym->st_value+h->delta;
printf_log(LOG_DEBUG, "RVTRANS: Set @_IO_stdout_ to %p\n", my__IO_2_1_stdout_);
printf_log(LOG_DEBUG, "BOX64: Set @_IO_stdout_ to %p\n", my__IO_2_1_stdout_);
}
}
}
}
void CreateMemorymapFile(rvtranscontext_t* context, int fd)
void CreateMemorymapFile(box64context_t* context, int fd)
{
// this will tranform current memory map
// by anotating anonymous entry that belong to emulated elf
// also anonymising current stack
// and setting emulated stack as the current one
char* line = NULL;
size_t len = 0;
char buff[1024];
struct stat st;
int dummy;
FILE* f = fopen("/proc/self/maps", "r");
if(!f)
return;
while(getline(&line, &len, f)>0) {
// line is like
// aaaadd750000-aaaadd759000 r-xp 00000000 103:02 13386730 /usr/bin/cat
uintptr_t start, end;
if (sscanf(line, "%zx-%zx", &start, &end)==2) {
elfheader_t* h = FindElfAddress(my_context, start);
int l = strlen(line);
if(h && l<73) {
sprintf(buff, "%s%*s\n", line, 74-l, h->name);
dummy = write(fd, buff, strlen(buff));
} else if(start==(uintptr_t)my_context->stack) {
sprintf(buff, "%s%*s\n", line, 74-l, "[stack]");
dummy = write(fd, buff, strlen(buff));
} else if (strstr(line, "[stack]")) {
char* p = strstr(line, "[stack]")-1;
while (*p==' ' || *p=='\t') --p;
p[1]='\0';
strcat(line, "\n");
dummy = write(fd, line, strlen(line));
} else {
dummy = write(fd, line, strlen(line));
}
}
}
fclose(f);
(void)dummy;
elfheader_t *h = context->elfs[0];
if (stat(h->path, &st)) {
printf_log(LOG_INFO, "Failed to stat file %s (creating memory maps \"file\")!", h->path);
// Some constants, to have "valid" values
st.st_dev = makedev(0x03, 0x00);
st.st_ino = 0;
}
// TODO: create heap entry?
for (size_t i=0; i<h->numPHEntries; ++i) {
if (h->PHEntries[i].p_memsz == 0) continue;
sprintf(buff, "%016lx-%016lx %c%c%c%c %016lx %02x:%02x %ld %s\n", (uintptr_t)h->PHEntries[i].p_vaddr + h->delta,
(uintptr_t)h->PHEntries[i].p_vaddr + h->PHEntries[i].p_memsz + h->delta,
(h->PHEntries[i].p_type & (PF_R|PF_X) ? 'r':'-'), (h->PHEntries[i].p_type & PF_W ? 'w':'-'),
(h->PHEntries[i].p_type & PF_X ? 'x':'-'), 'p', // p for private or s for shared
(uintptr_t)h->PHEntries[i].p_offset,
major(st.st_dev), minor(st.st_dev), st.st_ino, h->path);
dummy = write(fd, buff, strlen(buff));
}
// create stack entry
sprintf(buff, "%16lx-%16lx %c%c%c%c %16lx %02x:%02x %ld %s\n",
(uintptr_t)context->stack, (uintptr_t)context->stack+context->stacksz,
'r','w','-','p', 0L, 0, 0, 0L, "[stack]");
dummy = write(fd, buff, strlen(buff));
}
void ElfAttachLib(elfheader_t* head, library_t* lib)
@ -1520,6 +1665,25 @@ void ElfAttachLib(elfheader_t* head, library_t* lib)
head->lib = lib;
}
kh_mapsymbols_t* GetMapSymbols(elfheader_t* h)
{
if(!h)
return NULL;
return h->mapsymbols;
}
kh_mapsymbols_t* GetWeakSymbols(elfheader_t* h)
{
if(!h)
return NULL;
return h->weaksymbols;
}
kh_mapsymbols_t* GetLocalSymbols(elfheader_t* h)
{
if(!h)
return NULL;
return h->localsymbols;
}
typedef struct search_symbol_s{
const char* name;
void* addr;
@ -1585,7 +1749,7 @@ void* GetNativeSymbolUnversionned(void* lib, const char* name)
if(lib)
s.lib = lib;
else
s.lib = my_context->rvtranslib;
s.lib = my_context->box64lib;
printf_log(LOG_INFO, "Look for %s in loaded elfs\n", name);
dl_iterate_phdr(dl_iterate_phdr_findsymbol, &s);
return s.addr;
@ -1625,8 +1789,14 @@ EXPORT void PltResolver(x64emu_t* emu)
emu->quit = 1;
return;
} else {
elfheader_t* sym_elf = FindElfAddress(my_context, offs);
if(sym_elf && sym_elf!=my_context->elfs[0] && !sym_elf->init_done) {
printf_dump(LOG_DEBUG, "symbol %s from %s but elf not initialized yet, run Init now (from %s)\n", symname, ElfName(sym_elf), ElfName(h));
RunElfInitPltResolver(sym_elf, emu);
}
if(p) {
printf_dump(LOG_DEBUG, " Apply %s R_X86_64_JUMP_SLOT %p with sym=%s(ver %d: %s%s%s) (%p -> %p / %s)\n", (bind==STB_LOCAL)?"Local":"Global", p, symname, version, symname, vername?"@":"", vername?vername:"",*(void**)p, (void*)offs, ElfName(FindElfAddress(my_context, offs)));
printf_dump(LOG_DEBUG, " Apply %s R_X86_64_JUMP_SLOT %p with sym=%s(ver %d: %s%s%s) (%p -> %p / %s)\n", (bind==STB_LOCAL)?"Local":"Global", p, symname, version, symname, vername?"@":"", vername?vername:"",*(void**)p, (void*)offs, ElfName(sym_elf));
*p = offs;
} else {
printf_log(LOG_NONE, "PltResolver: Warning, Symbol %s(ver %d: %s%s%s) found, but Jump Slot Offset is NULL \n", symname, version, symname, vername?"@":"", vername?vername:"");

View File

@ -1,11 +1,9 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __ELFLOADER_PRIVATE_H_
#define __ELFLOADER_PRIVATE_H_
typedef struct library_s library_t;
typedef struct needed_libs_s needed_libs_t;
typedef struct kh_mapsymbols_s kh_mapsymbols_t;
#include <elf.h>
#include "elfloader.h"
@ -36,6 +34,7 @@ struct elfheader_s {
Elf64_Verdef* VerDef;
int szVerDef;
int e_type;
uint32_t flags;
intptr_t delta; // should be 0
@ -85,15 +84,20 @@ struct elfheader_s {
int init_done;
int fini_done;
int refcnt; // ref count for the elf
char* memory; // char* and not void* to allow math on memory pointer
char* memory; // char* and not void* to allow math on memory pointer
void** multiblock;
uintptr_t* multiblock_offs;
uint64_t* multiblock_size;
int multiblock_n;
library_t *lib;
needed_libs_t *neededlibs;
library_t *lib; // attached lib (exept on main elf)
needed_libs_t* needed;
kh_mapsymbols_t *mapsymbols;
kh_mapsymbols_t *weaksymbols;
kh_mapsymbols_t *localsymbols;
};
#define R_X86_64_NONE 0 /* No reloc */

View File

@ -1,12 +1,10 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <elf.h>
#include "rvtransversion.h"
#include "box64version.h"
#include "elfloader.h"
#include "debug.h"
#include "elfload_dump.h"
@ -79,7 +77,7 @@ elfheader_t* ParseElfHeader(FILE* f, const char* name, int exec)
return NULL;
}
if(header.e_ident[EI_CLASS]==ELFCLASS32) {
printf_log(LOG_INFO, "This is a 32bits ELF! rvtrans can only run 64bits ELF (%s)!\n", name);
printf_log(LOG_INFO, "This is a 32bits ELF! box64 can only run 64bits ELF (%s)!\n", name);
} else {
printf_log(LOG_INFO, "Not a 64bits ELF (%d)\n", header.e_ident[EI_CLASS]);
}
@ -184,14 +182,14 @@ elfheader_t* ParseElfHeader(FILE* f, const char* name, int exec)
FreeElfHeader(&h);
return NULL;
}
if(rvtrans_dump) DumpMainHeader(&header, h);
if(box64_dump) DumpMainHeader(&header, h);
LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".strtab", "SymTab Strings", SHT_STRTAB, (void**)&h->StrTab, NULL);
LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".symtab", "SymTab", SHT_SYMTAB, (void**)&h->SymTab, &h->numSymTab);
if(rvtrans_dump && h->SymTab) DumpSymTab(h);
if(box64_dump && h->SymTab) DumpSymTab(h);
LoadNamedSection(f, h->SHEntries, h->numSHEntries, h->SHStrTab, ".dynamic", "Dynamic", SHT_DYNAMIC, (void**)&h->Dynamic, &h->numDynamic);
if(rvtrans_dump && h->Dynamic) DumpDynamicSections(h);
if(box64_dump && h->Dynamic) DumpDynamicSections(h);
// grab DT_REL & DT_RELA stuffs
// also grab the DT_STRTAB string table
{
@ -280,6 +278,10 @@ elfheader_t* ParseElfHeader(FILE* f, const char* name, int exec)
h->VerDef = (Elf64_Verdef*)ptr;
printf_dump(LOG_DEBUG, "The DT_VERDEF is at address %p\n", h->VerDef);
break;
case DT_FLAGS:
h->flags = val;
printf_dump(LOG_DEBUG, "The DT_FLAGS is 0x%x\n", h->flags);
break;
}
}
if(h->rel) {
@ -409,3 +411,31 @@ const char* GetParentSymbolVersion(elfheader_t* h, int index)
}
return NULL;
}
int GetVersionIndice(elfheader_t* h, const char* vername)
{
if(!vername)
return 0;
if(h->VerNeed) {
Elf64_Verneed *ver = (Elf64_Verneed*)((uintptr_t)h->VerNeed + h->delta);
while(ver) {
Elf64_Vernaux *aux = (Elf64_Vernaux*)((uintptr_t)ver + ver->vn_aux);
for(int j=0; j<ver->vn_cnt; ++j) {
if(!strcmp(h->DynStr+aux->vna_name, vername))
return aux->vna_other;
aux = (Elf64_Vernaux*)((uintptr_t)aux + aux->vna_next);
}
ver = ver->vn_next?((Elf64_Verneed*)((uintptr_t)ver + ver->vn_next)):NULL;
}
}
if(h->VerDef) {
Elf64_Verdef *def = (Elf64_Verdef*)((uintptr_t)h->VerDef + h->delta);
while(def) {
Elf64_Verdaux *aux = (Elf64_Verdaux*)((uintptr_t)def + def->vd_aux);
if(!strcmp(h->DynStr+aux->vda_name, vername))
return def->vd_ndx;
def = def->vd_next?((Elf64_Verdef*)((uintptr_t)def + def->vd_next)):NULL;
}
}
return 0;
}

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define F8 *(uint8_t*)(addr++)
#define F8S *(int8_t*)(addr++)
#define F16 *(uint16_t*)(addr+=2, addr-2)
@ -11,8 +8,15 @@
#define F64 *(uint64_t*)(addr+=8, addr-8)
#define F64S *(int64_t*)(addr+=8, addr-8)
#define PK(a) *(uint8_t*)(addr+a)
#ifdef DYNAREC
#define STEP if(step) return 0;
#define STEP2 if(step) {R_RIP = addr; return 0;}
#define STEP3 if(*step) (*step)++;
#else
#define STEP
#define STEP2
#define STEP3
#endif
#define GETED(D) oped=GetEd(emu, &addr, rex, nextop, D)
#define GETED32(D) oped=GetEd32O(emu, &addr, rex, nextop, D, 0)
@ -46,7 +50,7 @@
#define MODREG ((nextop&0xC0)==0xC0)
#define GOCOND(BASE, PREFIX, COND, NOTCOND) \
#define GOCOND(BASE, PREFIX, COND, NOTCOND, POST)\
case BASE+0x0: \
PREFIX \
if(ACCESS_FLAG(F_OF)) { \
@ -54,6 +58,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x1: \
PREFIX \
@ -62,6 +67,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x2: \
PREFIX \
@ -70,6 +76,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x3: \
PREFIX \
@ -78,6 +85,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x4: \
PREFIX \
@ -86,6 +94,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x5: \
PREFIX \
@ -94,6 +103,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x6: \
PREFIX \
@ -102,6 +112,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x7: \
PREFIX \
@ -110,6 +121,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x8: \
PREFIX \
@ -118,6 +130,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0x9: \
PREFIX \
@ -126,6 +139,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0xA: \
PREFIX \
@ -134,6 +148,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0xB: \
PREFIX \
@ -142,6 +157,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0xC: \
PREFIX \
@ -150,6 +166,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0xD: \
PREFIX \
@ -158,6 +175,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0xE: \
PREFIX \
@ -166,6 +184,7 @@
} else { \
NOTCOND \
} \
POST \
break; \
case BASE+0xF: \
PREFIX \
@ -174,4 +193,5 @@
} else { \
NOTCOND \
} \
POST \
break;

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@ -9,16 +6,33 @@
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/mman.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64emu_private.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "x64run.h"
#include "x64run_private.h"
#include "callback.h"
#include "bridge.h"
#ifdef HAVE_TRACE
#include "x64trace.h"
#endif
#ifdef DYNAREC
#include "custommem.h"
#endif
// for the applyFlushTo0
#ifdef __x86_64__
#include <immintrin.h>
#elif defined(__aarch64__)
#else
#warning Architecture cannot follow SSE Flush to 0 flag
#endif
// from src/wrapped/wrappedlibc.c
int my_munmap(x64emu_t* emu, void* addr, unsigned long length);
typedef struct cleanup_s {
void* f;
@ -54,7 +68,7 @@ void* GetExit()
return (void*)AddCheckBridge(my_context->system, NULL, NULL, 0, "ExitEmulation");
}
static void internalX64Setup(x64emu_t* emu, rvtranscontext_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
static void internalX64Setup(x64emu_t* emu, box64context_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
{
emu->context = context;
// setup cpu helpers
@ -77,11 +91,11 @@ static void internalX64Setup(x64emu_t* emu, rvtranscontext_t *context, uintptr_t
emu->segs[_GS] = default_gs;
// setup fpu regs
reset_fpu(emu);
emu->mxcsr = 0x1f80;
emu->mxcsr.x32 = 0x1f80;
}
EXPORTDYN
x64emu_t *NewX64Emu(rvtranscontext_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
x64emu_t *NewX64Emu(box64context_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
{
printf_log(LOG_DEBUG, "Allocate a new X86_64 Emu, with EIP=%p and Stack=%p/0x%X\n", (void*)start, (void*)stack, stacksize);
@ -92,7 +106,7 @@ x64emu_t *NewX64Emu(rvtranscontext_t *context, uintptr_t start, uintptr_t stack,
return emu;
}
x64emu_t *NewX64EmuFromStack(x64emu_t* emu, rvtranscontext_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
x64emu_t *NewX64EmuFromStack(x64emu_t* emu, box64context_t *context, uintptr_t start, uintptr_t stack, int stacksize, int ownstack)
{
printf_log(LOG_DEBUG, "New X86_64 Emu from stack, with EIP=%p and Stack=%p/0x%X\n", (void*)start, (void*)stack, stacksize);
@ -108,6 +122,23 @@ void SetupX64Emu(x64emu_t *emu)
(void)emu; // Not doing much here...
}
#ifdef HAVE_TRACE
void SetTraceEmu(uintptr_t start, uintptr_t end)
{
if(my_context->zydis) {
if (end == 0) {
printf_log(LOG_INFO, "Setting trace\n");
} else {
if(end!=1) { // 0-1 is basically no trace, so don't printf it...
printf_log(LOG_INFO, "Setting trace only between %p and %p\n", (void*)start, (void*)end);
}
}
}
trace_start = start;
trace_end = end;
}
#endif
void AddCleanup(x64emu_t *emu, void *p)
{
(void)emu;
@ -154,9 +185,9 @@ void CallAllCleanup(x64emu_t *emu)
printf_log(LOG_DEBUG, "Calling atexit registered functions\n");
for(int i=my_context->clean_sz-1; i>=0; --i) {
printf_log(LOG_DEBUG, "Call cleanup #%d\n", i);
--my_context->clean_sz;
RunFunctionWithEmu(emu, 0, (uintptr_t)(my_context->cleanups[i].f), my_context->cleanups[i].arg, my_context->cleanups[i].a );
}
my_context->clean_sz = 0;
box_free(my_context->cleanups);
my_context->cleanups = NULL;
}
@ -164,7 +195,7 @@ void CallAllCleanup(x64emu_t *emu)
static void internalFreeX64(x64emu_t* emu)
{
if(emu && emu->stack2free)
munmap(emu->stack2free, emu->size_stack);
my_munmap(NULL, emu->stack2free, emu->size_stack);
}
EXPORTDYN
@ -203,10 +234,9 @@ void CloneEmu(x64emu_t *newemu, const x64emu_t* emu)
memcpy(newemu->fpu_ll, emu->fpu_ll, sizeof(emu->fpu_ll));
memcpy(newemu->p_regs, emu->p_regs, sizeof(emu->p_regs));
newemu->cw = emu->cw;
memcpy(&newemu->sw, &emu->sw, sizeof(emu->sw));
newemu->sw = emu->sw;
newemu->top = emu->top;
newemu->fpu_stack = emu->fpu_stack;
memcpy(&newemu->round, &emu->round, sizeof(emu->round));
memcpy(newemu->xmm, emu->xmm, sizeof(emu->xmm));
newemu->mxcsr = emu->mxcsr;
newemu->quit = emu->quit;
@ -217,7 +247,7 @@ void CloneEmu(x64emu_t *newemu, const x64emu_t* emu)
newemu->regs[_SP].q[0] = emu->regs[_SP].q[0] + (intptr_t)(newst - oldst);
}
rvtranscontext_t* GetEmuContext(x64emu_t* emu)
box64context_t* GetEmuContext(x64emu_t* emu)
{
return emu->context;
}
@ -258,6 +288,10 @@ void SetEBP(x64emu_t *emu, uint32_t v)
{
R_EBP = v;
}
//void SetESP(x64emu_t *emu, uint32_t v)
//{
// R_ESP = v;
//}
void SetRAX(x64emu_t *emu, uint64_t v)
{
R_RAX = v;
@ -302,6 +336,16 @@ uint64_t GetRBP(x64emu_t *emu)
{
return R_RBP;
}
/*void SetFS(x64emu_t *emu, uint16_t v)
{
emu->segs[_FS] = v;
emu->segs_serial[_FS] = 0;
}
uint16_t GetFS(x64emu_t *emu)
{
return emu->segs[_FS];
}*/
void ResetFlags(x64emu_t *emu)
{
@ -315,6 +359,24 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip)
" R8", " R9", "R10", "R11", "R12", "R13", "R14", "R15"};
char tmp[160];
buff[0] = '\0';
#ifdef HAVE_TRACE
if(trace_emm) {
// do emm reg if needed
for(int i=0; i<8; ++i) {
sprintf(tmp, "mm%d:%016lx", i, emu->mmx[i].q);
strcat(buff, tmp);
if ((i&3)==3) strcat(buff, "\n"); else strcat(buff, " ");
}
}
if(trace_xmm) {
// do xmm reg if needed
for(int i=0; i<8; ++i) {
sprintf(tmp, "%d:%016lx%016lx", i, emu->xmm[i].q[1], emu->xmm[i].q[0]);
strcat(buff, tmp);
if ((i&3)==3) strcat(buff, "\n"); else strcat(buff, " ");
}
}
#endif
// start with FPU regs...
if(emu->fpu_stack) {
for (int i=0; i<emu->fpu_stack; i++) {
@ -328,7 +390,16 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip)
strcat(buff, "\n");
}
for (int i=_AX; i<=_R15; ++i) {
#ifdef HAVE_TRACE
if (trace_regsdiff && (emu->regs[i].q[0] != emu->oldregs[i].q[0])) {
sprintf(tmp, "\e[1;35m%s=%016lx\e[m ", regname[i], emu->regs[i].q[0]);
emu->oldregs[i].q[0] = emu->regs[i].q[0];
} else {
sprintf(tmp, "%s=%016lx ", regname[i], emu->regs[i].q[0]);
}
#else
sprintf(tmp, "%s=%016lx ", regname[i], emu->regs[i].q[0]);
#endif
strcat(buff, tmp);
if (i%5==4) {
@ -369,6 +440,9 @@ void StopEmu(x64emu_t* emu, const char* reason)
} */
}
printf_log(LOG_NONE, "Old IP: %tX\n", emu->old_ip);
#ifdef HAVE_TRACE
printf_log(LOG_NONE, "%s\n", DecodeX64Trace(my_context->dec, emu->old_ip));
#endif
}
void UnimpOpcode(x64emu_t* emu)
@ -441,3 +515,25 @@ void ResetSegmentsCache(x64emu_t *emu)
return;
memset(emu->segs_serial, 0, sizeof(emu->segs_serial));
}
void applyFlushTo0(x64emu_t* emu)
{
#ifdef __x86_64__
_mm_setcsr(_mm_getcsr() | (emu->mxcsr.x32&0x8040));
#elif defined(__aarch64__)
#ifdef __ANDROID__
uint64_t fpcr;
__asm__ __volatile__ ("mrs %0, fpcr":"=r"(fpcr));
#else
uint64_t fpcr = __builtin_aarch64_get_fpcr();
#endif
fpcr &= ~((1<<24) | (1<<1)); // clear bit FZ (24) and AH (1)
fpcr |= (emu->mxcsr.f.MXCSR_FZ)<<24; // set FZ as mxcsr FZ
fpcr |= ((emu->mxcsr.f.MXCSR_DAZ)^(emu->mxcsr.f.MXCSR_FZ))<<1; // set AH if DAZ different from FZ
#ifdef __ANDROID__
__asm__ __volatile__ ("msr fpcr, %0"::"r"(fpcr));
#else
__builtin_aarch64_set_fpcr(fpcr);
#endif
#endif
}

View File

@ -1,12 +1,9 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __X86EMU_PRIVATE_H_
#define __X86EMU_PRIVATE_H_
#include "regs.h"
typedef struct rvtranscontext_s rvtranscontext_t;
typedef struct box64context_s box64context_t;
typedef struct x64_ucontext_s x64_ucontext_t;
#define ERR_UNIMPL 1
@ -38,12 +35,11 @@ typedef struct x64emu_s {
// fpu / mmx
mmx87_regs_t x87[8];
mmx87_regs_t mmx[8];
uint16_t cw;
x87control_t cw;
x87flags_t sw;
uint32_t top; // top is part of sw, but it's faster to have it separatly
int fpu_stack;
uint32_t mxcsr;
fpu_round_t round;
mmxcontrol_t mxcsr;
fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst
fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence
fpu_p_reg_t p_regs[8];
@ -55,13 +51,20 @@ typedef struct x64emu_s {
multiuint_t op1;
multiuint_t op2;
multiuint_t res;
multiuint_t op1_sav; // for dec/inc defered flags, to be able to compute CF
multiuint_t res_sav;
defered_flags_t df_sav;
uint32_t *x64emu_parity_tab; // helper
#ifdef HAVE_TRACE
reg64_t oldregs[16];
uintptr_t prev2_ip;
#endif
// segments
uint32_t segs[6]; // only 32bits value?
uintptr_t segs_offs[6]; // computed offset associate with segment
uint32_t segs_serial[6]; // are seg offset clean (not 0) or does they need to be re-computed (0)? For GS, serial need to be the same as context->sel_serial
// parent context
rvtranscontext_t *context;
box64context_t *context;
// cpu helpers
reg64_t zero;
reg64_t *sbiidx[16];
@ -94,4 +97,6 @@ typedef struct x64emu_s {
//#define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0; emu->quit=1;}
#define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0;} // should rise a SIGFPE and not quit
void applyFlushTo0(x64emu_t* emu);
#endif //__X86EMU_PRIVATE_H_

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <dlfcn.h>
#include <stdint.h>
@ -14,9 +11,10 @@
#include <pthread.h>
#include <signal.h>
#include <poll.h>
#include <sys/wait.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -25,7 +23,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "wrapper.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "librarian.h"
#include <elf.h>
@ -57,8 +55,13 @@ x64emu_t* x64emu_fork(x64emu_t* emu, int forktype)
for (int i=0; i<my_context->atfork_sz; --i)
if(my_context->atforks[i].parent)
EmuCall(emu, my_context->atforks[i].parent);
if(forktype==3) {
// vfork, the parent wait the end or execve of the son
waitpid(v, NULL, WEXITED);
}
} else if(v==0) {
ResetSegmentsCache(emu);
// execute atforks child functions
for (int i=0; i<my_context->atfork_sz; --i)
if(my_context->atforks[i].child)
@ -77,7 +80,7 @@ static uint8_t Peek8(uintptr_t addr, uintptr_t offset)
{
return *(uint8_t*)(addr+offset);
}
extern int errno;
void x64Int3(x64emu_t* emu, uintptr_t* addr)
{
if(Peek8(*addr, 0)=='S' && Peek8(*addr, 1)=='C') // Signature for "Out of x86 door"
@ -93,17 +96,22 @@ void x64Int3(x64emu_t* emu, uintptr_t* addr)
wrapper_t w = (wrapper_t)a;
a = F64(addr);
R_RIP = *addr;
if(rvtrans_log>=LOG_DEBUG || cycle_log) {
pthread_mutex_lock(&emu->context->mutex_trace);
/* This party can be used to trace only 1 specific lib (but it is quite slow)
elfheader_t *h = FindElfAddress(my_context, *(uintptr_t*)(R_ESP));
int have_trace = 0;
if(h && strstr(ElfName(h), "libMiles")) have_trace = 1;*/
if(box64_log>=LOG_DEBUG || cycle_log) {
int tid = GetTID();
char t_buff[256] = "\0";
char buff2[64] = "\0";
char buff3[64] = "\0";
char* buff = cycle_log?my_context->log_call[my_context->current_line]:t_buff;
char* buffret = cycle_log?my_context->log_ret[my_context->current_line]:NULL;
int cycle_line = my_context->current_line;
if(cycle_log) {
my_context->current_line = (my_context->current_line+1)%cycle_log;
}
char* buff = cycle_log?my_context->log_call[cycle_line]:t_buff;
char* buffret = cycle_log?my_context->log_ret[cycle_line]:NULL;
if(buffret) buffret[0] = '\0';
if(cycle_log)
my_context->current_line = (my_context->current_line+1)&(CYCLE_LOG-1);
char *tmp;
int post = 0;
int perr = 0;
@ -115,167 +123,176 @@ void x64Int3(x64emu_t* emu, uintptr_t* addr)
snprintf(buff, 256, "%s", cycle_log?"PltResolver ":" ... ");
} else if (!strcmp(s, "__open") || !strcmp(s, "open") || !strcmp(s, "open ") || !strcmp(s, "open64")) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %d (,%d))", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (int)(R_ESI), (int)(R_EDX));
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", %d (,%d))", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (int)(R_ESI), (int)(R_EDX));
perr = 1;
} else if (!strcmp(s, "shm_open")) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %d, %d)", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (int)(R_ESI), (int)(R_EDX));
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", %d, %d)", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (int)(R_ESI), (int)(R_EDX));
perr = 1;
} else if (!strcmp(s, "fopen") || !strcmp(s, "fopen64")) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (char*)(R_RSI));
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", \"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (char*)(R_RSI));
perr = 2;
} else if (!strcmp(s, "__openat64") || !strcmp(s, "openat64") || !strcmp(s, "__openat64_2")) {
tmp = (char*)(R_RSI);
snprintf(buff, 255, "%04d|%p: Calling %s(%d, \"%s\", %d (,%d))", tid, *(void**)(R_RSP), s, (int)R_EDI, (tmp)?tmp:"(nil)", (int)(R_EDX), (int)(R_ECX));
snprintf(buff, 256, "%04d|%p: Calling %s(%d, \"%s\", %d (,%d))", tid, *(void**)(R_RSP), s, (int)R_EDI, (tmp)?tmp:"(nil)", (int)(R_EDX), (int)(R_ECX));
perr = 1;
} else if (strstr(s, "readlink")==s) {
} else if (!strcmp(s, "readlink")) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %p, %zd)", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (void*)(R_RSI), (size_t)R_RDX);
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", %p, %zd)", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (void*)(R_RSI), (size_t)R_RDX);
perr = 1;
} else if (strstr(s, "mkdir")==s) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %d)", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (int)(R_ESI));
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", %d)", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (int)(R_ESI));
perr = 1;
} else if (strstr(s, "opendir")==s) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
perr = 2;
} else if (!strcmp(s, "read")) {
snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_RDX);
snprintf(buff, 256, "%04d|%p: Calling %s(%d, %p, %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_RDX);
perr = 1;
} else if (!strcmp(s, "write")) {
snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_RDX);
if(R_EDI==2 || R_EDI==3)
snprintf(buff, 256, "%04d|%p: Calling %s(%d, %p\"%s\", %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, (char*)R_RSI, R_RDX);
else
snprintf(buff, 256, "%04d|%p: Calling %s(%d, %p, %zu)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_RDX);
perr = 1;
} else if (strstr(s, "access")==s) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", 0x%x)", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", R_ESI);
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", 0x%x)", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", R_ESI);
perr = 1;
} else if (strstr(s, "waitpid")==s) {
pu32 = (uint32_t*)R_RSI;
snprintf(buff, 255, "%04d|%p: Calling %s(%ld, %p, %d)", tid, *(void**)(R_RSP), s, R_RDI, pu32, R_EDX);
snprintf(buff, 256, "%04d|%p: Calling %s(%d, %p, %d)", tid, *(void**)(R_RSP), s, R_EDI, pu32, R_EDX);
perr = 1;
post = 6;
} else if (!strcmp(s, "lseek64")) {
snprintf(buff, 255, "%04d|%p: Calling %s(%d, %ld, %d)", tid, *(void**)(R_RSP), s, (int)R_EDI, (int64_t)R_RSI, (int)R_EDX);
snprintf(buff, 256, "%04d|%p: Calling %s(%d, %ld, %d)", tid, *(void**)(R_RSP), s, (int)R_EDI, (int64_t)R_RSI, (int)R_EDX);
perr = 1;
} else if (!strcmp(s, "lseek")) {
snprintf(buff, 255, "%04d|%p: Calling %s(%d, %ld, %d)", tid, *(void**)(R_RSP), s, (int)R_EDI, (int64_t)R_RSI, (int)R_EDX);
snprintf(buff, 256, "%04d|%p: Calling %s(%d, %ld, %d)", tid, *(void**)(R_RSP), s, (int)R_EDI, (int64_t)R_RSI, (int)R_EDX);
perr = 1;
} else if (!strcmp(s, "recvmsg")) {
snprintf(buff, 255, "%04d|%p: Calling %s(%d, %p, 0x%x)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_EDX);
snprintf(buff, 256, "%04d|%p: Calling %s(%d, %p, 0x%x)", tid, *(void**)(R_RSP), s, R_EDI, (void*)R_RSI, R_EDX);
perr = 1;
} else if (strstr(s, "puts")==s) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
} else if (strstr(s, "strlen")==s) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
} else if (strstr(s, "strcmp")==s) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (char*)R_RSI);
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", \"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)", (char*)R_RSI);
} else if (strstr(s, "getenv")==s) {
tmp = (char*)(R_RDI);
post = 2;
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
} else if (strstr(s, "setenv")==s) {
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", \"%s\", %d)", tid, *(void**)(R_RSP), s, (char*)R_RDI, (char*)R_RSI, R_EDX);
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", \"%s\", %d)", tid, *(void**)(R_RSP), s, (char*)R_RDI, (char*)R_RSI, R_EDX);
} else if (!strcmp(s, "poll")) {
struct pollfd* pfd = (struct pollfd*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(%p[%d/%d/%d, ...], %d, %d)", tid, *(void**)(R_RSP), s, pfd, pfd->fd, pfd->events, pfd->revents, R_ESI, R_EDX);
snprintf(buff, 256, "%04d|%p: Calling %s(%p[%d/%d/%d, ...], %d, %d)", tid, *(void**)(R_RSP), s, pfd, pfd->fd, pfd->events, pfd->revents, R_ESI, R_EDX);
} else if (strstr(s, "__printf_chk")) {
tmp = (char*)(R_RSI);
snprintf(buff, 255, "%04d|%p: Calling %s(%d, \"%s\" (,%p))", tid, *(void**)(R_RSP), s, R_EDI, (tmp)?tmp:"(nil)", (void*)(R_RDX));
snprintf(buff, 256, "%04d|%p: Calling %s(%d, \"%s\" (,%p))", tid, *(void**)(R_RSP), s, R_EDI, (tmp)?tmp:"(nil)", (void*)(R_RDX));
} else if (strstr(s, "__snprintf_chk")) {
tmp = (char*)(R_R8);
pu64 = (uint64_t*)R_RDI;
post = 3;
snprintf(buff, 255, "%04d|%p: Calling %s(%p, %zu, %d, %zu, \"%s\" (,%p))", tid, *(void**)(R_RSP), s, (void*)R_RDI, R_RSI, R_EDX, R_RCX, (tmp)?tmp:"(nil)", (void*)(R_R9));
snprintf(buff, 256, "%04d|%p: Calling %s(%p, %zu, %d, %zu, \"%s\" (,%p))", tid, *(void**)(R_RSP), s, (void*)R_RDI, R_RSI, R_EDX, R_RCX, (tmp)?tmp:"(nil)", (void*)(R_R9));
} else if (!strcmp(s, "snprintf")) {
tmp = (char*)(R_RDX);
pu64 = (uint64_t*)R_RDI;
post = 3;
snprintf(buff, 255, "%04d|%p: Calling %s(%p, %zu, \"%s\" (,%p))", tid, *(void**)(R_RSP), s, (void*)R_RDI, R_RSI, (tmp)?tmp:"(nil)", (void*)(R_RCX));
snprintf(buff, 256, "%04d|%p: Calling %s(%p, %zu, \"%s\" (,%p))", tid, *(void**)(R_RSP), s, (void*)R_RDI, R_RSI, (tmp)?tmp:"(nil)", (void*)(R_RCX));
} else if (!strcmp(s, "getcwd")) {
post = 2;
snprintf(buff, 255, "%04d|%p: Calling %s(%p, %zu)", tid, *(void**)(R_RSP), s, (void*)R_RDI, R_RSI);
snprintf(buff, 256, "%04d|%p: Calling %s(%p, %zu)", tid, *(void**)(R_RSP), s, (void*)R_RDI, R_RSI);
} else if (!strcmp(s, "ftok")) {
tmp = (char*)(R_RDI);
perr = 1;
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\", %d)", tid, *(void**)(R_RSP), s, tmp?tmp:"nil", R_ESI);
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\", %d)", tid, *(void**)(R_RSP), s, tmp?tmp:"nil", R_ESI);
} else if (!strcmp(s, "glXGetProcAddress") || !strcmp(s, "SDL_GL_GetProcAddress") || !strcmp(s, "glXGetProcAddressARB")) {
tmp = (char*)(R_RDI);
snprintf(buff, 255, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
snprintf(buff, 256, "%04d|%p: Calling %s(\"%s\")", tid, *(void**)(R_RSP), s, (tmp)?tmp:"(nil)");
} else if (!strcmp(s, "glLabelObjectEXT")) {
tmp = (char*)(R_RCX);
snprintf(buff, 255, "%04d|%p: Calling %s(0x%x, %d, %d, \"%s\")", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_ECX, (tmp)?tmp:"(nil)");
snprintf(buff, 256, "%04d|%p: Calling %s(0x%x, %d, %d, \"%s\")", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_ECX, (tmp)?tmp:"(nil)");
} else if (!strcmp(s, "glGetStringi")) {
post = 2;
snprintf(buff, 255, "%04d|%p: Calling %s(0x%x, %d)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI);
snprintf(buff, 256, "%04d|%p: Calling %s(0x%x, %d)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI);
} else if (!strcmp(s, "_dl_tlsdesc_undefweak")) {
snprintf(buff, 255, "%04d|%p: Calling %s(RAX=%p)", tid, *(void**)(R_RSP), s, (void*)R_RAX);
snprintf(buff, 256, "%04d|%p: Calling %s(RAX=%p)", tid, *(void**)(R_RSP), s, (void*)R_RAX);
} else if (!strcmp(s, "glFramebufferTexture2D")) {
snprintf(buff, 255, "%04d|%p: Calling %s(0x%x, 0x%x, 0x%x, %u, %d)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, R_ECX, R_R8d);
snprintf(buff, 256, "%04d|%p: Calling %s(0x%x, 0x%x, 0x%x, %u, %d)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, R_ECX, R_R8d);
} else if (!strcmp(s, "glTexSubImage2D")) {
snprintf(buff, 255, "%04d|%p: Calling %s(0x%x, %d, %d, %d, %d, %d, 0x%x, 0x%x, %p)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, R_ECX, R_R8d, R_R9d, *(uint32_t*)(R_RSP+8), *(uint32_t*)(R_RSP+16), *(void**)(R_RSP+24));
snprintf(buff, 256, "%04d|%p: Calling %s(0x%x, %d, %d, %d, %d, %d, 0x%x, 0x%x, %p)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, R_ECX, R_R8d, R_R9d, *(uint32_t*)(R_RSP+8), *(uint32_t*)(R_RSP+16), *(void**)(R_RSP+24));
} else if (!strcmp(s, "glCompressedTexSubImage2D")) {
snprintf(buff, 255, "%04d|%p: Calling %s(0x%x, %d, %d, %d, %d, %d, 0x%x, %d, %p)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, R_ECX, R_R8d, R_R9d, *(uint32_t*)(R_RSP+8), *(uint32_t*)(R_RSP+16), *(void**)(R_RSP+24));
snprintf(buff, 256, "%04d|%p: Calling %s(0x%x, %d, %d, %d, %d, %d, 0x%x, %d, %p)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, R_ECX, R_R8d, R_R9d, *(uint32_t*)(R_RSP+8), *(uint32_t*)(R_RSP+16), *(void**)(R_RSP+24));
} else if (!strcmp(s, "glVertexAttribPointer")) {
snprintf(buff, 255, "%04d|%p: Calling %s(%u, %d, 0x%x, %d, %d, %p)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, R_ECX, R_R8d, (void*)R_R9);
snprintf(buff, 256, "%04d|%p: Calling %s(%u, %d, 0x%x, %d, %d, %p)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, R_ECX, R_R8d, (void*)R_R9);
} else if (!strcmp(s, "glDrawElements")) {
snprintf(buff, 255, "%04d|%p: Calling %s(0x%x, %d, 0x%x, %p)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, (void*)R_RCX);
snprintf(buff, 256, "%04d|%p: Calling %s(0x%x, %d, 0x%x, %p)", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, R_EDX, (void*)R_RCX);
} else if (!strcmp(s, "glUniform4fv")) {
snprintf(buff, 255, "%04d|%p: Calling %s(%d, %d, %p[%g/%g/%g/%g...])", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, (void*)R_RDX, ((float*)(R_RDX))[0], ((float*)(R_RDX))[1], ((float*)(R_RDX))[2], ((float*)(R_RDX))[3]);
snprintf(buff, 256, "%04d|%p: Calling %s(%d, %d, %p[%g/%g/%g/%g...])", tid, *(void**)(R_RSP), s, R_EDI, R_ESI, (void*)R_RDX, ((float*)(R_RDX))[0], ((float*)(R_RDX))[1], ((float*)(R_RDX))[2], ((float*)(R_RDX))[3]);
} else if (!strcmp(s, "ov_read")) {
snprintf(buff, 256, "%04d|%p: Calling %s(%p, %p, %d, %d, %d, %d, %p)", tid, *(void**)(R_RSP), s, (void*)R_RDI, (void*)R_RSI, R_EDX, R_ECX, R_R8d, R_R9d, *(void**)(R_RSP+8));
} else if (!strcmp(s, "mmap64") || !strcmp(s, "mmap")) {
snprintf(buff, 255, "%04d|%p: Calling %s(%p, %lu, 0x%x, 0x%x, %d, %ld)", tid, *(void**)(R_RSP), s,
snprintf(buff, 256, "%04d|%p: Calling %s(%p, %lu, 0x%x, 0x%x, %d, %ld)", tid, *(void**)(R_RSP), s,
(void*)R_RDI, R_RSI, (int)(R_RDX), (int)R_RCX, (int)R_R8, R_R9);
} else if (!strcmp(s, "sscanf")) {
tmp = (char*)(R_RSI);
snprintf(buff, 255, "%04d|%p: Calling %s(%p, \"%s\" (,%p))", tid, *(void**)(R_RSP), s, (void*)R_RDI, (tmp)?tmp:"(nil)", (void*)(R_RDX));
snprintf(buff, 256, "%04d|%p: Calling %s(%p, \"%s\" (,%p))", tid, *(void**)(R_RSP), s, (void*)R_RDI, (tmp)?tmp:"(nil)", (void*)(R_RDX));
} else if (!strcmp(s, "XCreateWindow")) {
tmp = (char*)(R_RSI);
snprintf(buff, 255, "%04d|%p: Calling %s(%p, %p, %d, %d, %u, %u, %u, %d, %u, %p, 0x%lx, %p)", tid, *(void**)(R_RSP), s,
snprintf(buff, 256, "%04d|%p: Calling %s(%p, %p, %d, %d, %u, %u, %u, %d, %u, %p, 0x%lx, %p)", tid, *(void**)(R_RSP), s,
(void*)R_RDI, (void*)R_RSI, (int)R_EDX, (int)R_ECX, R_R8d, R_R9d,
(uint32_t)*(uint64_t*)(R_RSP+8), (int)*(uint64_t*)(R_RSP+16),
(uint32_t)*(uint64_t*)(R_RSP+24), (void*)*(uint64_t*)(R_RSP+32),
(unsigned long)*(uint64_t*)(R_RSP+40), (void*)*(uint64_t*)(R_RSP+48));
} else {
snprintf(buff, 255, "%04d|%p: Calling %s(0x%lX, 0x%lX, 0x%lX, ...)", tid, *(void**)(R_RSP), s, R_RDI, R_RSI, R_RDX);
snprintf(buff, 256, "%04d|%p: Calling %s(0x%lX, 0x%lX, 0x%lX, ...)", tid, *(void**)(R_RSP), s, R_RDI, R_RSI, R_RDX);
}
if(!cycle_log) {
mutex_lock(&emu->context->mutex_trace);
printf_log(LOG_NONE, "%s =>", buff);
mutex_unlock(&emu->context->mutex_trace);
}
if(!cycle_log) printf_log(LOG_NONE, "%s =>", buff);
pthread_mutex_unlock(&emu->context->mutex_trace);
w(emu, a); // some function never come back, so unlock the mutex first!
pthread_mutex_lock(&emu->context->mutex_trace);
if(post)
switch(post) { // Only ever 2 for now...
case 1: snprintf(buff2, 63, " [%llu sec %llu nsec]", pu64?pu64[0]:~0ull, pu64?pu64[1]:~0ull);
case 1: snprintf(buff2, 64, " [%llu sec %llu nsec]", pu64?pu64[0]:~0ull, pu64?pu64[1]:~0ull);
break;
case 2: snprintf(buff2, 63, "(%s)", R_RAX?((char*)R_RAX):"nil");
case 2: snprintf(buff2, 64, "(%s)", R_RAX?((char*)R_RAX):"nil");
break;
case 3: snprintf(buff2, 63, "(%s)", pu64?((char*)pu64):"nil");
case 3: snprintf(buff2, 64, "(%s)", pu64?((char*)pu64):"nil");
break;
case 4: snprintf(buff2, 63, " (%f)", ST0.d);
case 4: snprintf(buff2, 64, " (%f)", ST0.d);
break;
case 5: {
uint32_t* p = (uint32_t*)R_RAX; // uint64_t? (case never used)
if(p)
snprintf(buff2, 63, " size=%ux%u, pitch=%u, pixels=%p", p[2], p[3], p[4], p+5);
snprintf(buff2, 64, " size=%ux%u, pitch=%u, pixels=%p", p[2], p[3], p[4], p+5);
else
snprintf(buff2, 63, "NULL Surface");
snprintf(buff2, 64, "NULL Surface");
}
break;
case 6: if(pu32) snprintf(buff2, 63, " [0x%x] ", pu32[0]);
case 6: if(pu32) snprintf(buff2, 64, " [0x%x] ", pu32[0]);
break;
}
if(perr==1 && ((int)R_EAX)<0)
snprintf(buff3, 63, " (errno=%d:\"%s\")", errno, strerror(errno));
snprintf(buff3, 64, " (errno=%d:\"%s\")", errno, strerror(errno));
else if(perr==2 && R_EAX==0)
snprintf(buff3, 63, " (errno=%d:\"%s\")", errno, strerror(errno));
snprintf(buff3, 64, " (errno=%d:\"%s\")", errno, strerror(errno));
if(cycle_log)
snprintf(buffret, 127, "0x%lX%s%s", R_RAX, buff2, buff3);
else
snprintf(buffret, 128, "0x%lX%s%s", R_RAX, buff2, buff3);
else {
mutex_lock(&emu->context->mutex_trace);
printf_log(LOG_NONE, " return 0x%lX%s%s\n", R_RAX, buff2, buff3);
pthread_mutex_unlock(&emu->context->mutex_trace);
mutex_unlock(&emu->context->mutex_trace);
}
} else
w(emu, a);
}
@ -292,3 +309,16 @@ int GetTID()
{
return syscall(SYS_gettid);
}
void print_cycle_log(int loglevel) {
if(cycle_log) {
printf_log(LOG_INFO, "Last calls\n");
int j = (my_context->current_line+1)%cycle_log;
for (int i=0; i<cycle_log; ++i) {
int k = (i+j)%cycle_log;
if(my_context->log_call[k][0]) {
printf_log(loglevel, "%s => return %s\n", my_context->log_call[k], my_context->log_ret[k]);
}
}
}
}

View File

@ -96,7 +96,7 @@
*
****************************************************************************/
// This has been heavily modified to fit rvtrans purpose...
// This has been heavily modified to fit box64 purpose...
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@ -1093,6 +1093,14 @@ uint16_t shrd16 (x64emu_t *emu, uint16_t d, uint16_t fill, uint8_t s)
CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
CLEAR_FLAG(F_OF);
#if 0
res = 0;
CLEAR_FLAG(F_CF);
CLEAR_FLAG(F_OF);
SET_FLAG(F_ZF);
CLEAR_FLAG(F_SF);
CLEAR_FLAG(F_PF);
#endif
}
return (uint16_t)res;
}
@ -1181,8 +1189,9 @@ uint16_t sbb16(x64emu_t *emu, uint16_t d, uint16_t s)
if (ACCESS_FLAG(F_CF))
res = d - s - 1;
else
else {
res = d - s;
}
CONDITIONAL_SET_FLAG(res & 0x8000, F_SF);
CONDITIONAL_SET_FLAG((res & 0xffff) == 0, F_ZF);
CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
@ -1203,8 +1212,9 @@ uint32_t sbb32(x64emu_t *emu, uint32_t d, uint32_t s)
if (ACCESS_FLAG(F_CF))
res = d - s - 1;
else
else {
res = d - s;
}
CONDITIONAL_SET_FLAG(res & 0x80000000, F_SF);
CONDITIONAL_SET_FLAG(!res, F_ZF);
CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);
@ -1225,8 +1235,9 @@ uint64_t sbb64(x64emu_t *emu, uint64_t d, uint64_t s)
if (ACCESS_FLAG(F_CF))
res = d - s - 1;
else
else {
res = d - s;
}
CONDITIONAL_SET_FLAG(res & 0x8000000000000000LL, F_SF);
CONDITIONAL_SET_FLAG(!res, F_ZF);
CONDITIONAL_SET_FLAG(PARITY(res & 0xff), F_PF);

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __X64PRIMOP_H_
#define __X64PRIMOP_H_
@ -92,88 +89,106 @@ uint64_t cmp64 (x64emu_t *emu, uint64_t d, uint64_t s);
uint8_t daa8 (x64emu_t *emu, uint8_t d);
uint8_t das8 (x64emu_t *emu, uint8_t d);
#define CF_SAV() \
if(emu->df>=d_dec8 && emu->df<=d_inc64) { \
emu->df_sav = d_none; \
} else if(emu->df<d_dec8i || emu->df>d_inc64i) {\
emu->df_sav = emu->df; \
emu->op1_sav = emu->op1; \
emu->res_sav = emu->res; \
}
static inline uint8_t dec8(x64emu_t *emu, uint8_t d)
{
CF_SAV();
emu->res.u8 = d - 1;
emu->op1.u8 = d;
emu->df = d_dec8;
emu->df = d_dec8i;
return emu->res.u8;
}
static inline uint16_t dec16(x64emu_t *emu, uint16_t d)
{
CF_SAV();
emu->res.u16 = d - 1;
emu->op1.u16 = d;
emu->df = d_dec16;
emu->df = d_dec16i;
return emu->res.u16;
}
static inline uint32_t dec32(x64emu_t *emu, uint32_t d)
{
CF_SAV();
emu->res.u32 = d - 1;
emu->op1.u32 = d;
emu->df = d_dec32;
emu->df = d_dec32i;
return emu->res.u32;
}
static inline uint64_t dec64(x64emu_t *emu, uint64_t d)
{
CF_SAV();
emu->res.u64 = d - 1;
emu->op1.u64 = d;
emu->df = d_dec64;
emu->df = d_dec64i;
return emu->res.u64;
}
static inline uint8_t inc8(x64emu_t *emu, uint8_t d)
{
CF_SAV();
emu->res.u8 = d + 1;
emu->op1.u8 = d;
emu->df = d_inc8;
emu->df = d_inc8i;
return emu->res.u8;
}
static inline uint16_t inc16(x64emu_t *emu, uint16_t d)
{
CF_SAV();
emu->res.u16 = d + 1;
emu->op1.u16 = d;
emu->df = d_inc16;
emu->df = d_inc16i;
return emu->res.u16;
}
static inline uint32_t inc32(x64emu_t *emu, uint32_t d)
{
if(emu->df == d_shr32) {
/*if(emu->df == d_shr32) {
// workaround for some wine trickery
uint32_t cnt = emu->op2.u32;
if (cnt > 0) {
uint32_t cc = emu->op1.u32 & (1 << (cnt - 1));
CONDITIONAL_SET_FLAG(cc, F_CF);
}
}
}*/
CF_SAV();
emu->res.u32 = d + 1;
emu->op1.u32 = d;
emu->df = d_inc32;
emu->df = d_inc32i;
return emu->res.u32;
}
static inline uint64_t inc64(x64emu_t *emu, uint64_t d)
{
if(emu->df == d_shr64) {
/*if(emu->df == d_shr64) {
// workaround for some wine trickery
uint64_t cnt = emu->op2.u64;
if (cnt > 0) {
uint64_t cc = emu->op1.u64 & (1LL << (cnt - 1));
CONDITIONAL_SET_FLAG(cc, F_CF);
}
}
}*/
CF_SAV();
emu->res.u64 = d + 1;
emu->op1.u64 = d;
emu->df = d_inc64;
emu->df = d_inc64i;
return emu->res.u64;
}
#undef CF_SAV
static inline uint8_t or8(x64emu_t *emu, uint8_t d, uint8_t s)
{

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -10,8 +7,9 @@
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -19,9 +17,12 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "signals.h"
#ifdef DYNAREC
#include "../dynarec/native_lock.h"
#endif
#include "modrm.h"
@ -48,7 +49,8 @@ int Run(x64emu_t *emu, int step)
return 0;
if(addr==0) {
emu->quit = 1;
printf_log(LOG_INFO, "Ask to run at NULL, quit silently\n");
printf_log(LOG_INFO, "%04d|Ask to run at NULL, quit silently\n", GetTID());
print_cycle_log(LOG_INFO);
return 0;
}
//ref opcode: http://ref.x64asm.net/geek32.html#xA1
@ -57,6 +59,14 @@ int Run(x64emu_t *emu, int step)
x64emurun:
while(1) {
#ifdef HAVE_TRACE
__builtin_prefetch((void*)addr, 0, 0);
emu->prev2_ip = emu->old_ip;
if(my_context->dec && (
(trace_end == 0)
|| ((addr >= trace_start) && (addr < trace_end))) )
PrintTrace(emu, addr, 0);
#endif
emu->old_ip = addr;
opcode = F8;
@ -126,10 +136,11 @@ x64emurun:
case 0x0F: /* More instructions */
switch(rep) {
case 1:
if(!(addr = RunF20F(emu, rex, addr))) {
if(!(addr = RunF20F(emu, rex, addr, &step))) {
unimp = 1;
goto fini;
}
if(step==2) STEP2;
break;
case 2:
if(!(addr = RunF30F(emu, rex, addr))) {
@ -138,10 +149,11 @@ x64emurun:
}
break;
default:
if(!(addr = Run0F(emu, rex, addr))) {
if(!(addr = Run0F(emu, rex, addr, &step))) {
unimp = 1;
goto fini;
}
if(step==2) STEP2;
break;
}
if(emu->quit) {
@ -156,7 +168,7 @@ x64emurun:
GO(0x30, xor) /* XOR 0x30 -> 0x35 */
#undef GO
case 0x2E: /* segments are ignored */
case 0x2E: /* segments are ignored */
case 0x36: /* SS: (ignored) */
break;
@ -321,7 +333,7 @@ x64emurun:
GOCOND(0x70
, tmp8s = F8S; CHECK_FLAGS(emu);
, addr += tmp8s;
,
,,STEP2
) /* Jxx Ib */
case 0x80: /* GRP Eb,Ib */
@ -404,6 +416,18 @@ x64emurun:
break;
case 0x86: /* XCHG Eb,Gb */
nextop = F8;
#ifdef DYNAREC
GETEB(0);
GETGB;
if(MODREG) { // reg / reg: no lock
tmp8u = GB;
GB = EB->byte[0];
EB->byte[0] = tmp8u;
} else {
GB = native_lock_xchg_b(EB, GB);
}
// dynarec use need it's own mecanism
#else
GETEB(0);
GETGB;
if(!MODREG)
@ -413,9 +437,31 @@ x64emurun:
EB->byte[0] = tmp8u;
if(!MODREG)
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0x87: /* XCHG Ed,Gd */
nextop = F8;
#ifdef DYNAREC
GETED(0);
GETGD;
if(MODREG) {
if(rex.w) {
tmp64u = GD->q[0];
GD->q[0] = ED->q[0];
ED->q[0] = tmp64u;
} else {
tmp32u = GD->dword[0];
GD->q[0] = ED->dword[0];
ED->q[0] = tmp32u;
}
} else {
if(rex.w) {
GD->q[0] = native_lock_xchg_dd(ED, GD->q[0]);
} else {
GD->q[0] = native_lock_xchg_d(ED, GD->dword[0]);
}
}
#else
GETED(0);
GETGD;
if(!MODREG)
@ -434,6 +480,7 @@ x64emurun:
}
if(!MODREG)
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0x88: /* MOV Eb,Gb */
nextop = F8;
@ -1220,7 +1267,7 @@ x64emurun:
addr += tmp8s;
STEP2
break;
case 0xE3: /* JECXZ */
case 0xE3: /* JRCXZ */
tmp8s = F8S;
if(!R_RCX)
addr += tmp8s;
@ -1361,10 +1408,12 @@ x64emurun:
case 4: /* MUL EAX,Ed */
mul32_eax(emu, ED->dword[0]);
emu->regs[_AX].dword[1] = 0;
emu->regs[_DX].dword[1] = 0;
break;
case 5: /* IMUL EAX,Ed */
imul32_eax(emu, ED->dword[0]);
emu->regs[_AX].dword[1] = 0;
emu->regs[_DX].dword[1] = 0;
break;
case 6: /* DIV Ed */
div32(emu, ED->dword[0]);

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,14 +17,18 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "my_cpuid.h"
#include "bridge.h"
#include "signals.h"
#ifdef DYNAREC
#include "custommem.h"
#include "../dynarec/native_lock.h"
#endif
#include "modrm.h"
uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
{
uint8_t opcode;
uint8_t nextop;
@ -36,6 +37,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
int32_t tmp32s, tmp32s2;
uint32_t tmp32u, tmp32u2;
uint64_t tmp64u, tmp64u2;
int64_t tmp64s;
reg64_t *oped, *opgd;
sse_regs_t *opex, *opgx, eax1;
mmx87_regs_t *opem, *opgm, eam1;
@ -192,32 +194,38 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEX(0);
GETGM;
GM->sd[1] = EX->f[1];
GM->sd[0] = EX->f[0];
if(isnanf(EX->f[1]) || isinff(EX->f[1]) || EX->f[1]>0x7fffffff)
GM->sd[1] = 0x80000000;
else
GM->sd[1] = EX->f[1];
if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>0x7fffffff)
GM->sd[0] = 0x80000000;
else
GM->sd[0] = EX->f[0];
break;
case 0x2D: /* CVTPS2PI Gm, Ex */
// rounding should be done; and indefinite integer should also be assigned if overflow or NaN/Inf
nextop = F8;
GETEX(0);
GETGM;
switch((emu->mxcsr>>13)&3) {
case ROUND_Nearest:
GM->sd[1] = floorf(EX->f[1]+0.5f);
GM->sd[0] = floorf(EX->f[0]+0.5f);
break;
case ROUND_Down:
GM->sd[1] = floorf(EX->f[1]);
GM->sd[0] = floorf(EX->f[0]);
break;
case ROUND_Up:
GM->sd[1] = ceilf(EX->f[1]);
GM->sd[0] = ceilf(EX->f[0]);
break;
case ROUND_Chop:
GM->sd[1] = EX->f[1];
GM->sd[0] = EX->f[0];
break;
}
for(int i=1; i>=0; --i)
if(isnanf(EX->f[i]) || isinff(EX->f[i]) || EX->f[i]>0x7fffffff)
GM->sd[i] = 0x80000000;
else
switch(emu->mxcsr.f.MXCSR_RC) {
case ROUND_Nearest:
GM->sd[i] = nearbyintf(EX->f[i]);
break;
case ROUND_Down:
GM->sd[i] = floorf(EX->f[i]);
break;
case ROUND_Up:
GM->sd[i] = ceilf(EX->f[i]);
break;
case ROUND_Chop:
GM->sd[i] = EX->f[i];
break;
}
break;
case 0x2E: /* UCOMISS Gx, Ex */
// same for now
@ -303,6 +311,25 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
}
break;
case 0xF0: /* MOVBE Gd, Ed*/
nextop = F8;
GETGD;
GETED(0);
if(rex.w)
GD->q[0] = __builtin_bswap64(ED->q[0]);
else
GD->q[0] = __builtin_bswap32(ED->dword[0]);
break;
case 0xF1: /* MOVBE Ed, Gd*/
nextop = F8;
GETGD;
GETED(0);
if(rex.w)
ED->q[0] = __builtin_bswap64(GD->q[0]);
else
ED->q[0] = __builtin_bswap32(GD->dword[0]);
break;
default:
return 0;
}
@ -342,6 +369,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
CHECK_FLAGS(emu);
, if(rex.w) {GD->q[0] = ED->q[0]; } else {GD->q[0] = ED->dword[0];}
, if(!rex.w) GD->dword[1] = 0;
,
) /* 0x40 -> 0x4F CMOVxx Gd,Ed */ // conditional move, no sign
case 0x50: /* MOVMSKPS Gd, Ex */
@ -748,13 +776,14 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GOCOND(0x80
, tmp32s = F32S; CHECK_FLAGS(emu);
, addr += tmp32s;
,
,,STEP3
) /* 0x80 -> 0x8F Jxx */
GOCOND(0x90
, nextop = F8; CHECK_FLAGS(emu);
GETEB(0);
, EB->byte[0]=1;
, EB->byte[0]=0;
,
) /* 0x90 -> 0x9F SETxx Eb */
case 0xA2: /* CPUID */
@ -809,12 +838,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETED(0);
GETGD;
tmp32s = GD->sdword[0];
tmp8u=tmp32s&(rex.w?63:31);
tmp32s >>= (rex.w?6:5);
tmp64s = rex.w?GD->sq[0]:GD->sdword[0];
tmp8u=tmp64s&(rex.w?63:31);
tmp64s >>= (rex.w?6:5);
if(!MODREG)
{
ED=(reg64_t*)(((uintptr_t)(ED))+(tmp32s<<(rex.w?3:2)));
ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));
}
if(rex.w) {
if(ED->q[0] & (1LL<<tmp8u))
@ -875,10 +904,18 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
fpu_fxrstor32(emu, ED);
break;
case 2: /* LDMXCSR Md */
emu->mxcsr = ED->dword[0];
emu->mxcsr.x32 = ED->dword[0];
if(box64_sse_flushto0)
applyFlushTo0(emu);
break;
case 3: /* STMXCSR Md */
ED->dword[0] = emu->mxcsr;
ED->dword[0] = emu->mxcsr.x32;
break;
case 7: /* CLFLUSH Ed */
#ifdef DYNAREC
if(box64_dynarec)
cleanDBFromAddressRange((uintptr_t)ED, 8, 0);
#endif
break;
default:
return 0;
@ -917,12 +954,14 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
}
} else {
cmp32(emu, R_EAX, ED->dword[0]);
R_RAX = ED->dword[0]; // to erase upper part of RAX
if(ACCESS_FLAG(F_ZF)) {
if(MODREG)
ED->q[0] = GD->dword[0];
else
ED->dword[0] = GD->dword[0];
R_RAX = R_EAX; // to erase upper part of RAX
} else {
R_RAX = ED->dword[0];
}
}
break;
@ -931,12 +970,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETED(0);
GETGD;
tmp32s = GD->sdword[0];
tmp8u=tmp32s&(rex.w?63:31);
tmp32s >>= (rex.w?6:5);
tmp64s = rex.w?GD->sq[0]:GD->sdword[0];
tmp8u=tmp64s&(rex.w?63:31);
tmp64s >>= (rex.w?6:5);
if(!MODREG)
{
ED=(reg64_t*)(((uintptr_t)(ED))+(tmp32s<<(rex.w?3:2)));
ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));
}
if(rex.w) {
if(ED->q[0] & (1LL<<tmp8u)) {
@ -1061,12 +1100,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETED(0);
GETGD;
tmp32s = GD->sdword[0];
tmp8u=tmp32s&(rex.w?63:31);
tmp32s >>= (rex.w?6:5);
tmp64s = rex.w?GD->sq[0]:GD->sdword[0];
tmp8u=tmp64s&(rex.w?63:31);
tmp64s >>= (rex.w?6:5);
if(!MODREG)
{
ED=(reg64_t*)(((uintptr_t)(ED))+(tmp32s<<(rex.w?3:2)));
ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));
}
if(rex.w) {
if(ED->q[0] & (1LL<<tmp8u))
@ -1220,14 +1259,14 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 0xC4: /* PINSRW Gm,Ew,Ib */
nextop = F8;
GETED(0);
GETED(1);
GETGM;
tmp8u = F8;
GM->uw[tmp8u&3] = ED->word[0]; // only low 16bits
break;
case 0xC5: /* PEXTRW Gw,Em,Ib */
nextop = F8;
GETEM(0);
GETEM(1);
GETGD;
tmp8u = F8;
GD->q[0] = EM->uw[tmp8u&3]; // 16bits extract, 0 extended
@ -1342,7 +1381,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEM(0);
GETGD;
GD->dword[0] = 0;
GD->q[0] = 0;
for (int i=0; i<8; ++i)
if(EM->ub[i]&0x80)
GD->dword[0] |= (1<<i);
@ -1421,7 +1460,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEM(0);
GETGM;
if(EM->q>15)
tmp8u = 16;
tmp8u = 15;
else
tmp8u = EM->ub[0];
for(int i=0; i<4; ++i)

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -10,8 +7,9 @@
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -19,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
@ -121,7 +119,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
GETEX_OFFS(0, tlsdata);
GETGX;
GX->q[0] = EX->q[0];
if((nextop&0xC0)!=0xC0) {
if(!MODREG) {
// EX is not a register
GX->q[1] = 0;
}
@ -131,7 +129,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
GETEX_OFFS(0, tlsdata);
GETGX;
GX->ud[0] = EX->ud[0];
if((nextop&0xC0)!=0xC0) {
if(!MODREG) {
// EX is not a register (reg to reg only move 31:0)
GX->ud[1] = GX->ud[2] = GX->ud[3] = 0;
}
@ -165,7 +163,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
}
break;
case 0x29: /* MOVAPS Ex,Gx */
case 0x29: /* MOVAPS FS:Ex,Gx */
switch(rep) {
case 0:
nextop = F8;
@ -181,7 +179,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
case 0x59:
switch(rep) {
case 2: /* MULSS Gx, Ex */
case 2: /* MULSS Gx, FS:Ex */
nextop = F8;
GETEX_OFFS(0, tlsdata);
GETGX;
@ -192,10 +190,23 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
return 0;
}
break;
case 0x5A:
switch(rep) {
case 2: /* CVTSS2SD Gx, FS:Ex */
nextop = F8;
GETEX_OFFS(0, tlsdata);
GETGX;
GX->d[0] = EX->f[0];
break;
default:
return 0;
}
break;
case 0x6F:
switch(rep) {
case 2: /* MOVDQU Gx, Ex */
case 2: /* MOVDQU Gx, FS:Ex */
nextop = F8;
GETEX_OFFS(0, tlsdata);
GETGX;
@ -207,21 +218,33 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
}
break;
case 0xAF: /* IMUL Gd,Ed */
nextop = F8;
GETED_OFFS(0, tlsdata);
GETGD;
if(rex.w)
GD->q[0] = imul64(emu, GD->q[0], ED->q[0]);
else
GD->q[0] = imul32(emu, GD->dword[0], ED->dword[0]);
case 0xAF:
switch(rep) {
case 0: /* IMUL Gd, FS:Ed */
nextop = F8;
GETED_OFFS(0, tlsdata);
GETGD;
if(rex.w)
GD->q[0] = imul64(emu, GD->q[0], ED->q[0]);
else
GD->q[0] = imul32(emu, GD->dword[0], ED->dword[0]);
break;
default:
return 0;
}
break;
case 0xB6: /* MOVZX Gd,Eb */
nextop = F8;
GETEB_OFFS(0, tlsdata);
GETGD;
GD->q[0] = EB->byte[0];
case 0xB6:
switch(rep) {
case 0: /* MOVZX Gd, FS:Eb */
nextop = F8;
GETEB_OFFS(0, tlsdata);
GETGD;
GD->q[0] = EB->byte[0];
break;
default:
return 0;
}
break;
default:
@ -256,7 +279,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
break;
case 0x63: /* MOVSXD Gd,Ed */
case 0x63: /* MOVSXD Gd, FS:Ed */
nextop = F8;
GETED_OFFS(0, tlsdata);
GETGD;
@ -270,7 +293,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
break;
case 0x66:
return Run6664(emu, rex, addr);
return Run6664(emu, rex, seg, addr);
case 0x80: /* GRP Eb,Ib */
nextop = F8;
@ -297,7 +320,7 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
tmp32s = F8S;
}
if(rex.w) {
tmp64u = (uint64_t)tmp32s;
tmp64u = (uint64_t)(int64_t)tmp32s;
switch((nextop>>3)&7) {
case 0: ED->q[0] = add64(emu, ED->q[0], tmp64u); break;
case 1: ED->q[0] = or64(emu, ED->q[0], tmp64u); break;
@ -335,13 +358,13 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
}
break;
case 0x88: /* MOV Eb,Gb */
case 0x88: /* MOV FS:Eb,Gb */
nextop = F8;
GETEB_OFFS(0, tlsdata);
GETGB;
EB->byte[0] = GB;
break;
case 0x89: /* MOV Ed,Gd */
case 0x89: /* MOV FS:Ed,Gd */
nextop = F8;
GETED_OFFS(0, tlsdata);
GETGD;
@ -355,13 +378,13 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
ED->dword[0] = GD->dword[0];
}
break;
case 0x8A: /* MOV Gb,Eb */
case 0x8A: /* MOV Gb, FS:Eb */
nextop = F8;
GETEB_OFFS(0, tlsdata);
GETGB;
GB = EB->byte[0];
break;
case 0x8B: /* MOV Gd,Ed */
case 0x8B: /* MOV Gd, FS:Ed */
nextop = F8;
GETED_OFFS(0, tlsdata);
GETGD;
@ -371,12 +394,12 @@ uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
GD->q[0] = ED->dword[0];
break;
case 0xC6: /* MOV Eb,Ib */
case 0xC6: /* MOV FS:Eb, Ib */
nextop = F8;
GETEB_OFFS(1, tlsdata);
EB->byte[0] = F8;
break;
case 0xC7: /* MOV Ed,Id */
case 0xC7: /* MOV FS:Ed, Id */
nextop = F8;
GETED_OFFS(4, tlsdata);
if(rex.w)

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,8 +17,11 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#ifdef DYNAREC
#include "../dynarec/native_lock.h"
#endif
#include "modrm.h"
@ -133,7 +133,9 @@ uintptr_t Run66(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
break;
case 0x64: /* FS: */
return Run6664(emu, rex, addr);
return Run6664(emu, rex, _FS, addr);
case 0x65: /* GS: */
return Run6664(emu, rex, _GS, addr);
case 0x69: /* IMUL Gw,Ew,Iw */
nextop = F8;

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -10,8 +7,9 @@
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -19,8 +17,9 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
static uint8_t ff_mult(uint8_t a, uint8_t b)
@ -55,6 +54,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
int32_t tmp32s;
uint32_t tmp32u;
uint64_t tmp64u;
int64_t tmp64s;
float tmpf;
#ifndef NOALIGN
int is_nan;
@ -198,10 +198,10 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEX(0);
GETGM;
switch((emu->mxcsr>>13)&3) {
switch(emu->mxcsr.f.MXCSR_RC) {
case ROUND_Nearest:
GM->sd[0] = floor(EX->d[0]+0.5);
GM->sd[1] = floor(EX->d[1]+0.5);
GM->sd[0] = nearbyint(EX->d[0]);
GM->sd[1] = nearbyint(EX->d[1]);
break;
case ROUND_Down:
GM->sd[0] = floor(EX->d[0]);
@ -348,6 +348,16 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
}
break;
case 0x10: /* PBLENDVB Gx, Ex */
nextop = F8;
GETEX(0);
GETGX;
for (int i=0; i<16; ++i) {
if(emu->xmm[0].ub[i]&0x80)
GX->ub[i] = EX->ub[i];
}
break;
case 0x14: /* BLENDVPS Gx, Ex */
nextop = F8;
GETEX(0);
@ -435,6 +445,27 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GX->sq[i] = EX->sd[i];
break;
case 0x29: /* PCMPEQQ Gx, Ex */
nextop = F8;
GETEX(0);
GETGX;
for(int i=1; i>=0; --i)
GX->sq[i] = (GX->sq[i]==EX->sq[i])?-1LL:0LL;
break;
case 0x2B: /* PACKUSDW Gx, Ex */
nextop = F8;
GETEX(0);
GETGX;
for(int i=3; i>=0; --i)
GX->uw[i] = ((GX->sd[i]<0)?0:(GX->sd[i]>65535)?65535:GX->sd[i]);
if(GX==EX)
GX->q[1] = GX->q[0];
else
for(int i=0; i<4; ++i)
GX->uw[i+4] = ((EX->sd[i]<0)?0:(EX->sd[i]>65535)?65535:EX->sd[i]);
break;
case 0x30: /* PMOVZXBW Gx, Ex */
nextop = F8;
GETEX(0);
@ -542,6 +573,14 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
if(GX->ud[i]<EX->ud[i])
GX->ud[i] = EX->ud[i];
break;
case 0x40: /* PMULLD Gx, Ex */
nextop = F8;
GETEX(0);
GETGX;
for(int i=0; i<4; ++i)
if(GX->ud[i]<EX->ud[i])
GX->ud[i] *= EX->ud[i];
break;
case 0xDB: /* AESIMC Gx, Ex */
nextop = F8;
@ -659,18 +698,18 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETGX;
tmp8u = F8; // ignoring bit 3 interupt thingy
if(tmp8u&4)
tmp8u = (emu->mxcsr>>13)&3;
tmp8u = emu->mxcsr.f.MXCSR_RC;
else
tmp8u &= 3;
switch(tmp8u) {
case ROUND_Nearest:
GX->f[0] = floor(EX->f[0]+0.5);
GX->f[0] = nearbyintf(EX->f[0]);
break;
case ROUND_Down:
GX->f[0] = floor(EX->f[0]);
GX->f[0] = floorf(EX->f[0]);
break;
case ROUND_Up:
GX->f[0] = ceil(EX->f[0]);
GX->f[0] = ceilf(EX->f[0]);
break;
case ROUND_Chop:
GX->f[0] = EX->f[0];
@ -683,12 +722,12 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETGX;
tmp8u = F8; // ignoring bit 3 interupt thingy
if(tmp8u&4)
tmp8u = (emu->mxcsr>>13)&3;
tmp8u = emu->mxcsr.f.MXCSR_RC;
else
tmp8u &= 3;
switch(tmp8u) {
case ROUND_Nearest:
GX->d[0] = floor(EX->d[0]+0.5);
GX->d[0] = nearbyint(EX->d[0]);
break;
case ROUND_Down:
GX->d[0] = floor(EX->d[0]);
@ -753,12 +792,12 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETGX;
tmp8u = F8;
ED->dword[0] = GX->ud[tmp8u&3];
if(MODREG && rex.w) ED->dword[1] = 0;
if(MODREG) ED->dword[1] = 0;
break;
case 0x20: // PINSRB GX, ED, u8
nextop = F8;
GETED(1);
GETED(1); // It's ED, and not EB
GETGX;
tmp8u = F8;
GX->ub[tmp8u&0xf] = ED->byte[0];
@ -798,6 +837,25 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GX->f[i] = (tmp8u&(1<<i))?tmpf:0.0f;
break;
case 0x44: /* PCLMULQDQ Gx, Ex, Ib */
nextop = F8;
GETEX(1);
GETGX;
tmp8u = F8;
{
int g = (tmp8u&1)?1:0;
int e = (tmp8u&0b10000)?1:0;
__int128 result = 0;
__int128 op2 = EX->q[e];
for (int i=0; i<64; ++i)
if(GX->q[g]&(1LL<<i))
result ^= (op2<<i);
GX->q[0] = result&0xffffffffffffffffLL;
GX->q[1] = (result>>64)&0xffffffffffffffffLL;
}
break;
case 0xDF: // AESKEYGENASSIST Gx, Ex, u8
nextop = F8;
GETEX(1);
@ -830,14 +888,14 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEW(0);
GETGW;
, if(rex.w) GW->q[0] = EW->q[0]; else GW->word[0] = EW->word[0];
,
,,
) /* 0x40 -> 0x4F CMOVxx Gw,Ew */ // conditional move, no sign
case 0x50: /* MOVMSKPD Gd, Ex */
nextop = F8;
GETEX(0);
GETGD;
GD->dword[0] = 0;
GD->q[0] = 0;
for(int i=0; i<2; ++i)
GD->dword[0] |= ((EX->q[i]>>63)&1)<<i;
break;
@ -923,32 +981,24 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEX(0);
GETGX;
switch((emu->mxcsr>>13)&3) {
case ROUND_Nearest:
GX->sd[0] = floorf(EX->f[0]+0.5f);
GX->sd[1] = floorf(EX->f[1]+0.5f);
GX->sd[2] = floorf(EX->f[2]+0.5f);
GX->sd[3] = floorf(EX->f[3]+0.5f);
break;
case ROUND_Down:
GX->sd[0] = floorf(EX->f[0]);
GX->sd[1] = floorf(EX->f[1]);
GX->sd[2] = floorf(EX->f[2]);
GX->sd[3] = floorf(EX->f[3]);
break;
case ROUND_Up:
GX->sd[0] = ceilf(EX->f[0]);
GX->sd[1] = ceilf(EX->f[1]);
GX->sd[2] = ceilf(EX->f[2]);
GX->sd[3] = ceilf(EX->f[3]);
break;
case ROUND_Chop:
GX->sd[0] = EX->f[0];
GX->sd[1] = EX->f[1];
GX->sd[2] = EX->f[2];
GX->sd[3] = EX->f[3];
break;
}
for(int i=0; i<4; ++i)
if(isnanf(EX->f[i]) || isinff(EX->f[i]) || EX->f[i]>0x7fffffff || EX->f[i]<-0x80000000)
GX->sd[i] = 0x80000000;
else
switch(emu->mxcsr.f.MXCSR_RC) {
case ROUND_Nearest:
GX->sd[i] = nearbyintf(EX->f[i]);
break;
case ROUND_Down:
GX->sd[i] = floorf(EX->f[i]);
break;
case ROUND_Up:
GX->sd[i] = ceilf(EX->f[i]);
break;
case ROUND_Chop:
GX->sd[i] = EX->f[i];
break;
}
break;
case 0x5C: /* SUBPD Gx, Ex */
nextop = F8;
@ -1186,6 +1236,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 4: /* PSRAW Ex, Ib */
tmp8u = F8;
if(tmp8u>15) tmp8u=15;
for (int i=0; i<8; ++i) EX->sw[i] >>= tmp8u;
break;
case 6: /* PSLLW Ex, Ib */
@ -1212,10 +1263,8 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 4: /* PSRAD Ex, Ib */
tmp8u = F8;
if(tmp8u>31) {
for (int i=0; i<4; ++i) EX->sd[i] = (EX->sd[i]<0)?-1:0;
} else
for (int i=0; i<4; ++i) EX->sd[i] >>= tmp8u;
if(tmp8u>31) tmp8u=31;
for (int i=0; i<4; ++i) EX->sd[i] >>= tmp8u;
break;
case 6: /* PSLLD Ex, Ib */
tmp8u = F8;
@ -1327,7 +1376,31 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
#endif
}
break;
case 0x7D: /* HSUBPD Gx, Ex */
nextop = F8;
GETEX(0);
GETGX;
#ifndef NOALIGN
is_nan = isnan(GX->d[0]) || isnan(GX->d[1]);
#endif
GX->d[0] -= GX->d[1];
#ifndef NOALIGN
if(!is_nan && isnan(GX->d[0]))
GX->d[0] = -NAN;
#endif
if(EX==GX) {
GX->d[1] = GX->d[0];
} else {
#ifndef NOALIGN
is_nan = isnan(EX->d[0]) || isnan(EX->d[1]);
#endif
GX->d[1] = EX->d[0] - EX->d[1];
#ifndef NOALIGN
if(!is_nan && isnan(GX->d[1]))
GX->d[1] = -NAN;
#endif
}
break;
case 0x7E: /* MOVD Ed, Gx */
nextop = F8;
GETED(0);
@ -1493,13 +1566,19 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEB(0);
GETGW;
GW->word[0] = EB->byte[0];
if(rex.w)
GW->q[0] = EB->byte[0];
else
GW->word[0] = EB->byte[0];
break;
case 0xB7: /* MOVZX Gw,Ew */
nextop = F8;
GETEW(0);
GETGW;
GW->word[0] = EW->word[0];
if(rex.w)
GW->q[0] = EW->word[0];
else
GW->word[0] = EW->word[0];
break;
case 0xBA:
@ -1507,7 +1586,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
switch((nextop>>3)&7) {
case 4: /* BT Ew,Ib */
CHECK_FLAGS(emu);
GETEW(0);
GETEW(1);
GETGW;
tmp8u = F8;
if(rex.w) {
@ -1526,7 +1605,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 5: /* BTS Ew, Ib */
CHECK_FLAGS(emu);
GETEW(0);
GETEW(1);
GETGW;
tmp8u = F8;
if(rex.w) {
@ -1549,7 +1628,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 6: /* BTR Ew, Ib */
CHECK_FLAGS(emu);
GETEW(0);
GETEW(1);
GETGW;
tmp8u = F8;
if(rex.w) {
@ -1570,7 +1649,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 7: /* BTC Ew, Ib */
CHECK_FLAGS(emu);
GETEW(0);
GETEW(1);
GETGW;
tmp8u = F8;
if(rex.w) {
@ -1598,12 +1677,12 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEW(0);
GETGW;
tmp32s = rex.w?GW->sdword[0]:GW->sword[0];
tmp8u=tmp32s&(rex.w?63:15);
tmp32s >>= (rex.w?6:4);
tmp64s = rex.w?GW->sq[0]:GW->sword[0];
tmp8u=tmp64s&(rex.w?63:15);
tmp64s >>= (rex.w?6:4);
if(!MODREG)
{
EW=(reg64_t*)(((uintptr_t)(EW))+(tmp32s<<(rex.w?3:1)));
EW=(reg64_t*)(((uintptr_t)(EW))+(tmp64s<<(rex.w?3:1)));
}
if(rex.w) {
if(EW->q[0] & (1LL<<tmp8u))
@ -1730,7 +1809,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEX(1);
GETGD;
tmp8u = F8;
GD->dword[0] = EX->uw[tmp8u&7]; // 16bits extract, 0 extended
GD->q[0] = EX->uw[tmp8u&7]; // 16bits extract, 0 extended
break;
case 0xC6: /* SHUFPD Gx, Ex, Ib */
nextop = F8;
@ -1903,7 +1982,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEX(0);
GETGX;
tmp8u=(EX->q[0]>15)?16:EX->ub[0];
tmp8u=(EX->q[0]>15)?15:EX->ub[0];
for (int i=0; i<8; ++i)
GX->sw[i] >>= tmp8u;
break;
@ -1911,13 +1990,9 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEX(0);
GETGX;
tmp8u=(EX->q[0]>31)?32:EX->ub[0];
if(tmp8u>31)
for (int i=0; i<4; ++i)
GX->sd[i] = (GX->sd[i]<0)?-1:0;
else
for (int i=0; i<4; ++i)
GX->sd[i] >>= tmp8u;
tmp8u=(EX->q[0]>31)?31:EX->ub[0];
for (int i=0; i<4; ++i)
GX->sd[i] >>= tmp8u;
break;
case 0xE3: /* PAVGW Gx, Ex */
nextop = F8;
@ -1948,8 +2023,14 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEX(0);
GETGX;
GX->sd[0] = EX->d[0];
GX->sd[1] = EX->d[1];
if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff)
GX->sd[0] = 0x80000000;
else
GX->sd[0] = EX->d[0];
if(isnan(EX->d[1]) || isinf(EX->d[1]) || EX->d[1]>0x7fffffff)
GX->sd[1] = 0x80000000;
else
GX->sd[1] = EX->d[1];
GX->q[1] = 0;
break;
case 0xE7: /* MOVNTDQ Ex, Gx */
@ -1988,10 +2069,8 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEX(0);
GETGX;
GX->ud[0] |= EX->ud[0];
GX->ud[1] |= EX->ud[1];
GX->ud[2] |= EX->ud[2];
GX->ud[3] |= EX->ud[3];
GX->q[0] |= EX->q[0];
GX->q[1] |= EX->q[1];
break;
case 0xEC: /* PADDSB Gx,Ex */
nextop = F8;
@ -2051,7 +2130,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
if(EX->q[0]>63)
{GX->q[0] = GX->q[1] = 0;}
else
{tmp8u=EX->q[0]; for (int i=0; i<2; ++i) GX->q[i] <<= tmp8u;}
{tmp8u=EX->ub[0]; for (int i=0; i<2; ++i) GX->q[i] <<= tmp8u;}
break;
case 0xF4: /* PMULUDQ Gx,Ex */
nextop = F8;

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -10,8 +7,9 @@
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -19,18 +17,18 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
uintptr_t Run6664(x64emu_t *emu, rex_t rex, uintptr_t addr)
uintptr_t Run6664(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr)
{
uint8_t opcode;
uint8_t nextop;
reg64_t *oped, *opgd;
sse_regs_t *opex, *opgx;
uintptr_t tlsdata = GetFSBaseEmu(emu);
uintptr_t tlsdata = GetSegmentBaseEmu(emu, seg);
opcode = F8;
// REX prefix before the F0 are ignored
@ -45,13 +43,12 @@ uintptr_t Run6664(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 0x0F:
opcode = F8;
switch(opcode) {
case 0xD6: /* MOVQ Ex,Gx */
case 0x11: /* MOVUPD Ex, Gx */
nextop = F8;
GETEX_OFFS(0, tlsdata);
GETGX;
EX->q[0] = GX->q[0];
if(MODREG)
EX->q[1] = 0;
memcpy(EX, GX, 16); // unaligned...
break;
case 0x2E: /* UCOMISD Gx, Ex */
@ -73,6 +70,15 @@ uintptr_t Run6664(x64emu_t *emu, rex_t rex, uintptr_t addr)
CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF);
break;
case 0xD6: /* MOVQ Ex,Gx */
nextop = F8;
GETEX_OFFS(0, tlsdata);
GETGX;
EX->q[0] = GX->q[0];
if(MODREG)
EX->q[1] = 0;
break;
default:
return 0;
}

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -10,8 +7,9 @@
#include <signal.h>
#include <sys/types.h>
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -19,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,8 +17,11 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#ifdef DYNAREC
#include "dynarec/native_lock.h"
#endif
#include "modrm.h"
@ -34,6 +34,9 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
int64_t tmp64s;
uint64_t tmp64u, tmp64u2;
reg64_t *oped, *opgd;
#ifdef USE_CAS
uint64_t tmpcas;
#endif
opcode = F8;
// REX prefix before the F0 are ignored
@ -55,6 +58,18 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEW(0);
GETGW;
#ifdef DYNAREC
do {
tmp16u = native_lock_read_h(EW);
cmp16(emu, R_AX, tmp16u);
if(ACCESS_FLAG(F_ZF)) {
tmp32s = native_lock_write_h(EW, GW->word[0]);
} else {
R_AX = tmp16u;
tmp32s = 0;
}
} while(tmp32s);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
cmp16(emu, R_AX, EW->word[0]);
if(ACCESS_FLAG(F_ZF)) {
@ -63,12 +78,37 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
R_AX = EW->word[0];
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xC1: /* XADD Gw,Ew */
nextop = F8;
GETEW(0);
GETGW;
#ifdef DYNAREC
if(rex.w) {
do {
tmp64u = native_lock_read_dd(ED);
tmp64u2 = add64(emu, tmp64u, GD->q[0]);
} while(native_lock_write_dd(ED, tmp64u2));
GD->q[0] = tmp64u;
} else {
if(((uintptr_t)ED)&1) {
do {
tmp16u = ED->word[0] & ~0xff;
tmp16u |= native_lock_read_h(ED);
tmp16u2 = add16(emu, tmp16u, GD->word[0]);
} while(native_lock_write_h(ED, tmp16u2&0xff));
ED->word[0] = tmp16u2;
} else {
do {
tmp16u = native_lock_read_h(ED);
tmp16u2 = add16(emu, tmp16u, GD->word[0]);
} while(native_lock_write_h(ED, tmp16u2));
}
GD->word[0] = tmp16u;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
tmp64u = add64(emu, ED->q[0], GD->q[0]);
@ -80,12 +120,48 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->word[0] = tmp16u;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
return 0;
}
break;
#ifdef DYNAREC
#define GO(B, OP) \
case B+1: \
nextop = F8; \
GETEW(0); \
GETGW; \
if(rex.w) { \
do { \
tmp64u = native_lock_read_dd(ED); \
tmp64u = OP##64(emu, tmp64u, GD->q[0]); \
} while (native_lock_write_dd(ED, tmp64u)); \
} else { \
do { \
tmp16u = native_lock_read_h(ED); \
tmp16u = OP##16(emu, tmp16u, GW->word[0]); \
} while (native_lock_write_h(ED, tmp16u)); \
} \
break; \
case B+3: \
nextop = F8; \
GETEW(0); \
GETGW; \
if(rex.w) \
GD->q[0] = OP##64(emu, GD->q[0], ED->q[0]); \
else \
GW->word[0] = OP##16(emu, GW->word[0], EW->word[0]);\
break; \
case B+5: \
if(rex.w) \
R_RAX = OP##64(emu, R_RAX, F32S64); \
else \
R_AX = OP##16(emu, R_AX, F16); \
break;
#else
#define GO(B, OP) \
case B+1: \
nextop = F8; \
@ -95,10 +171,7 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
if(rex.w) \
ED->q[0] = OP##64(emu, ED->q[0], GD->q[0]); \
else \
if(MODREG) \
ED->q[0] = OP##32(emu, ED->dword[0], GD->dword[0]); \
else \
EW->word[0] = OP##16(emu, EW->word[0], GW->word[0]); \
EW->word[0] = OP##16(emu, EW->word[0], GW->word[0]);\
pthread_mutex_unlock(&emu->context->mutex_lock); \
break; \
case B+3: \
@ -120,6 +193,7 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
R_AX = OP##16(emu, R_AX, F16); \
pthread_mutex_unlock(&emu->context->mutex_lock); \
break;
#endif
GO(0x00, add) /* ADD 0x00 -> 0x05 */
GO(0x08, or) /* OR 0x08 -> 0x0D */
GO(0x10, adc) /* ADC 0x10 -> 0x15 */
@ -135,6 +209,30 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETED((opcode==0x83)?1:2);
tmp64s = (opcode==0x83)?(F8S):(F16S);
tmp64u = (uint64_t)tmp64s;
#ifdef DYNAREC
if(MODREG)
switch((nextop>>3)&7) {
case 0: ED->word[0] = add16(emu, ED->word[0], tmp64u); break;
case 1: ED->word[0] = or16(emu, ED->word[0], tmp64u); break;
case 2: ED->word[0] = adc16(emu, ED->word[0], tmp64u); break;
case 3: ED->word[0] = sbb16(emu, ED->word[0], tmp64u); break;
case 4: ED->word[0] = and16(emu, ED->word[0], tmp64u); break;
case 5: ED->word[0] = sub16(emu, ED->word[0], tmp64u); break;
case 6: ED->word[0] = xor16(emu, ED->word[0], tmp64u); break;
case 7: cmp16(emu, ED->word[0], tmp64u); break;
}
else
switch((nextop>>3)&7) {
case 0: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = add16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
case 1: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = or16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
case 2: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = adc16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
case 3: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = sbb16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
case 4: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = and16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
case 5: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = sub16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
case 6: do { tmp16u2 = native_lock_read_h(ED); tmp16u2 = xor16(emu, tmp16u2, tmp64u);} while(native_lock_write_h(ED, tmp16u2)); break;
case 7: cmp16(emu, ED->word[0], tmp64u); break;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
switch((nextop>>3)&7) {
case 0: ED->word[0] = add16(emu, ED->word[0], tmp64u); break;
@ -147,6 +245,7 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 7: cmp16(emu, ED->word[0], tmp64u); break;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xFF: /* GRP 5 Ed */
@ -154,6 +253,38 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETED(0);
switch((nextop>>3)&7) {
case 0: /* INC Ed */
#ifdef DYNAREC
if(rex.w)
if(((uintptr_t)ED)&7) {
// unaligned
do {
tmp64u = ED->q[0] & 0xffffffffffffff00LL;
tmp64u |= native_lock_read_b(ED);
tmp64u = inc64(emu, tmp64u);
} while(native_lock_write_b(ED, tmp64u&0xff));
ED->q[0] = tmp64u;
}
else
do {
tmp64u = native_lock_read_dd(ED);
} while(native_lock_write_dd(ED, inc64(emu, tmp64u)));
else {
if((uintptr_t)ED&1) {
//meh.
do {
tmp16u = ED->word[0];
tmp16u &=~0xff;
tmp16u |= native_lock_read_b(ED);
tmp16u = inc16(emu, tmp16u);
} while(native_lock_write_b(ED, tmp16u&0xff));
ED->word[0] = tmp16u;
} else {
do {
tmp16u = native_lock_read_h(ED);
} while(native_lock_write_h(ED, inc16(emu, tmp16u)));
}
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
ED->q[0] = inc64(emu, ED->q[0]);
@ -161,8 +292,30 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->word[0] = inc16(emu, ED->word[0]);
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 1: /* DEC Ed */
#ifdef DYNAREC
if(rex.w)
if(((uintptr_t)ED)&7) {
// unaligned
do {
tmp64u = ED->q[0] & 0xffffffffffffff00LL;
tmp64u |= native_lock_read_b(ED);
tmp64u = dec64(emu, tmp64u);
} while(native_lock_write_b(ED, tmp64u&0xff));
ED->q[0] = tmp64u;
}
else
do {
tmp64u = native_lock_read_dd(ED);
} while(native_lock_write_dd(ED, dec64(emu, tmp64u)));
else {
do {
tmp16u = native_lock_read_h(ED);
} while(native_lock_write_h(ED, dec16(emu, tmp16u)));
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
ED->q[0] = dec64(emu, ED->q[0]);
@ -170,6 +323,7 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->word[0] = dec16(emu, ED->word[0]);
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
printf_log(LOG_NONE, "Illegal Opcode 0xF0 0xFF 0x%02X 0x%02X\n", nextop, PK(0));

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
@ -397,20 +394,22 @@ uintptr_t Run67(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
case 4: /* MUL EAX,Ed */
mul32_eax(emu, ED->dword[0]);
emu->regs[_AX].dword[1] = 0;
emu->regs[_DX].dword[1] = 0;
break;
case 5: /* IMUL EAX,Ed */
imul32_eax(emu, ED->dword[0]);
emu->regs[_AX].dword[1] = 0;
emu->regs[_DX].dword[1] = 0;
break;
case 6: /* DIV Ed */
div32(emu, ED->dword[0]);
emu->regs[_AX].dword[1] = 0;
emu->regs[_DX].dword[1] = 0;
//emu->regs[_AX].dword[1] = 0;
//emu->regs[_DX].dword[1] = 0;
break;
case 7: /* IDIV Ed */
idiv32(emu, ED->dword[0]);
emu->regs[_AX].dword[1] = 0;
emu->regs[_DX].dword[1] = 0;
//emu->regs[_AX].dword[1] = 0;
//emu->regs[_DX].dword[1] = 0;
break;
}
}

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,8 +17,12 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "signals.h"
#ifdef DYNAREC
#include "../dynarec/native_lock.h"
#endif
#include "modrm.h"
@ -43,40 +44,67 @@ uintptr_t Run670F(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)
switch(opcode) {
case 0x2E: /* UCOMISS Gx, Ex */
case 0x2E:
// same for now
case 0x2F: /* COMISS Gx, Ex */
if(rep) {
return 0;
case 0x2F:
switch(rep) {
case 0: /* (U)COMISS Gx, Ex */
RESET_FLAGS(emu);
nextop = F8;
GETEX32(0);
GETGX;
if(isnan(GX->f[0]) || isnan(EX->f[0])) {
SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF);
} else if(isgreater(GX->f[0], EX->f[0])) {
CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
} else if(isless(GX->f[0], EX->f[0])) {
CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF);
} else {
SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
}
CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF);
break;
default:
return 0;
}
RESET_FLAGS(emu);
nextop = F8;
GETEX32(0);
GETGX;
if(isnan(GX->f[0]) || isnan(EX->f[0])) {
SET_FLAG(F_ZF); SET_FLAG(F_PF); SET_FLAG(F_CF);
} else if(isgreater(GX->f[0], EX->f[0])) {
CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
} else if(isless(GX->f[0], EX->f[0])) {
CLEAR_FLAG(F_ZF); CLEAR_FLAG(F_PF); SET_FLAG(F_CF);
} else {
SET_FLAG(F_ZF); CLEAR_FLAG(F_PF); CLEAR_FLAG(F_CF);
}
CLEAR_FLAG(F_OF); CLEAR_FLAG(F_AF); CLEAR_FLAG(F_SF);
break;
case 0x6F: /* MOVQ Gm, Em */
nextop = F8;
GETEM32(0);
GETGM;
GM->q = EM->q;
case 0x6F:
switch(rep) {
case 0: /* MOVQ Gm, Em */
nextop = F8;
GETEM32(0);
GETGM;
GM->q = EM->q;
break;
default:
return 0;
}
break;
case 0x7F: /* MOVQ Em, Gm */
nextop = F8;
GETEM32(0);
GETGM;
EM->q = GM->q;
case 0x7F:
switch(rep) {
case 0: /* MOVQ Em, Gm */
nextop = F8;
GETEM32(0);
GETGM;
EM->q = GM->q;
break;
default:
return 0;
}
break;
case 0xB9:
switch(rep) {
case 0: /* UD1 Ed */
nextop = F8;
GETED32(0);
emit_signal(emu, SIGILL, (void*)R_RIP, 0);
break;
default:
return 0;
}
break;
default:

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,8 +17,12 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#ifdef DYNAREC
#include "../dynarec/native_lock.h"
#endif
#include "modrm.h"
uintptr_t Run6766(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr)

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"

View File

@ -1,22 +1,27 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <dlfcn.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_TRACE
#include <unistd.h>
#include <sys/syscall.h>
#endif
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64run_private.h"
#include "x64emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "x64run.h"
#include "librarian.h"
#include "elfloader.h"
#ifdef HAVE_TRACE
#include "x64trace.h"
#endif
#include "x64tls.h"
#include "bridge.h"
@ -46,6 +51,7 @@ int32_t EXPORT my___libc_start_main(x64emu_t* emu, int *(main) (int, char * *, c
} else {
RunElfInit(my_context->elfs[0], emu);
}
MarkElfInitDone(my_context->elfs[0]);
printf_log(LOG_DEBUG, "Transfert to main(%d, %p, %p)=>%p from __libc_start_main\n", my_context->argc, my_context->argv, my_context->envv, main);
// call main and finish
Push64(emu, GetRBP(emu)); // set frame pointer
@ -64,12 +70,19 @@ int32_t EXPORT my___libc_start_main(x64emu_t* emu, int *(main) (int, char * *, c
SetRBP(emu, Pop64(emu)); // restore RBP
emu->quit = 1; // finished!
}
return 0;
return (int)GetEAX(emu);
}
const char* GetNativeName(void* p)
{
static char buff[500] = {0};
#ifdef HAVE_TRACE
{
const char* n = getBridgeName(p);
if(n)
return n;
}
#endif
Dl_info info;
if(dladdr(p, &info)==0) {
const char *ret = GetNameOffset(my_context->maplib, p);
@ -926,7 +939,34 @@ void UpdateFlags(x64emu_t *emu)
case d_rcr32:
case d_rcr64:
case d_unknown:
printf_log(LOG_NONE, "RVTrans: %p trying to evaluate Unknown defered Flags\n", (void*)R_RIP);
printf_log(LOG_NONE, "Box64: %p trying to evaluate Unknown defered Flags\n", (void*)R_RIP);
break;
case d_dec8i:
case d_dec16i:
case d_dec32i:
case d_dec64i:
case d_inc8i:
case d_inc16i:
case d_inc32i:
case d_inc64i:
{
defered_flags_t df = emu->df - (d_dec8i - d_dec8);
if(emu->df_sav!=d_none) {
// compute CF
multiuint_t op1 = emu->op1;
multiuint_t res = emu->res;
emu->df = emu->df_sav;
emu->op1 = emu->op1_sav;
emu->res = emu->res_sav;
emu->df_sav = d_none;
UpdateFlags(emu);
emu->op1 = op1;
emu->res = res;
}
emu->df = df;
}
UpdateFlags(emu);
break;
}
RESET_FLAGS(emu);
@ -980,6 +1020,82 @@ void printFunctionAddr(uintptr_t nextaddr, const char* text)
}
}
#ifdef HAVE_TRACE
extern uint64_t start_cnt;
#define PK(a) (*(uint8_t*)(ip+a))
#define PK32(a) (*(int32_t*)((uint8_t*)(ip+a)))
#define PK64(a) (*(int64_t*)((uint8_t*)(ip+a)))
void PrintTrace(x64emu_t* emu, uintptr_t ip, int dynarec)
{
if(start_cnt) --start_cnt;
if(!start_cnt && my_context->dec && (
(trace_end == 0)
|| ((ip >= trace_start) && (ip < trace_end))) ) {
int tid = syscall(SYS_gettid);
mutex_lock(&my_context->mutex_trace);
#ifdef DYNAREC
if((my_context->trace_tid != tid) || (my_context->trace_dynarec!=dynarec)) {
printf_log(LOG_NONE, "Thread %04d| (%s) |\n", tid, dynarec?"dyn":"int");
my_context->trace_tid = tid;
my_context->trace_dynarec = dynarec;
}
#else
(void)dynarec;
if(my_context->trace_tid != tid) {
printf_log(LOG_NONE, "Thread %04d|\n", tid);
my_context->trace_tid = tid;
}
#endif
printf_log(LOG_NONE, "%s", DumpCPURegs(emu, ip));
if(R_RIP==0) {
printf_log(LOG_NONE, "Running at NULL address\n");
mutex_unlock(&my_context->mutex_trace);
return;
}
if(PK(0)==0xcc && PK(1)=='S' && PK(2)=='C') {
uint64_t a = *(uint64_t*)(ip+3);
if(a==0) {
printf_log(LOG_NONE, "%p: Exit x86emu\n", (void*)ip);
} else {
printf_log(LOG_NONE, "%p: Native call to %p => %s\n", (void*)ip, (void*)a, GetNativeName(*(void**)(ip+11)));
}
} else {
printf_log(LOG_NONE, "%s", DecodeX64Trace(my_context->dec, ip));
uint8_t peek = PK(0);
rex_t rex = {0};
if(peek>=0x40 && peek<=0x4f) {
rex.rex = peek;
ip++;
peek = PK(0);
}
if(peek==0xC3 || peek==0xC2 || (peek==0xF3 && PK(1)==0xC3)) {
printf_log(LOG_NONE, " => %p", *(void**)(R_RSP));
printFunctionAddr(*(uintptr_t*)(R_RSP), "=> ");
} else if(peek==0x57 && rex.b) {
printf_log(LOG_NONE, " => STACK_TOP: %p", *(void**)(R_RSP));
printFunctionAddr(ip, "here: ");
} else if(peek==0x55 || peek==0x53) {
printFunctionAddr(*(uintptr_t*)(R_RSP), " STACK_TOP: ");
} else if(peek==0xF3 && PK(1)==0x0F && PK(2)==0x1E && PK(3)==0xFA) {
printFunctionAddr(*(uintptr_t*)(R_RSP), " STACK_TOP: ");
} else if(peek==0xE8) { // Call
uintptr_t nextaddr = ip + 5 + PK64(1);
printFunctionAddr(nextaddr, "=> ");
} else if(peek==0xFF) {
if(PK(1)==0x25) {
uintptr_t nextaddr = ip + 6 + PK64(2);
printFunctionAddr(nextaddr, "=> ");
}
}
printf_log(LOG_NONE, "\n");
}
mutex_unlock(&my_context->mutex_trace);
}
}
#endif
static uint8_t F8(uintptr_t* addr) {
uint8_t ret = *(uint8_t*)*addr;
*addr+=1;

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __X86RUN_PRIVATE_H_
#define __X86RUN_PRIVATE_H_
@ -67,11 +64,11 @@ void UpdateFlags(x64emu_t *emu);
#define CHECK_FLAGS(emu) if(emu->df) UpdateFlags(emu)
#define RESET_FLAGS(emu) emu->df = d_none
uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step);
uintptr_t Run64(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr);
uintptr_t Run66(x64emu_t *emu, rex_t rex, int rep, uintptr_t addr);
uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t Run6664(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t Run6664(x64emu_t *emu, rex_t rex, int seg, uintptr_t addr);
uintptr_t Run66D9(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t Run66DD(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr);
@ -88,7 +85,7 @@ uintptr_t RunDD(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t RunDE(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr);
uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step);
uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr);
void x64Syscall(x64emu_t *emu);
@ -103,4 +100,8 @@ uintptr_t GetSegmentBaseEmu(x64emu_t* emu, int seg);
const char* GetNativeName(void* p);
#ifdef HAVE_TRACE
void PrintTrace(x64emu_t* emu, uintptr_t ip, int dynarec);
#endif
#endif //__X86RUN_PRIVATE_H_

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
@ -119,76 +116,36 @@ uintptr_t RunD8(x64emu_t *emu, rex_t rex, uintptr_t addr)
switch((nextop>>3)&7) {
case 0: /* FADD ST0, float */
GETED(0);
if(!(((uintptr_t)ED)&3))
ST0.d += *(float*)ED;
else {
memcpy(&f, ED, sizeof(float));
ST0.d += f;
}
ST0.d += *(float*)ED;
break;
case 1: /* FMUL ST0, float */
GETED(0);
if(!(((uintptr_t)ED)&3))
ST0.d *= *(float*)ED;
else {
memcpy(&f, ED, sizeof(float));
ST0.d *= f;
}
ST0.d *= *(float*)ED;
break;
case 2: /* FCOM ST0, float */
GETED(0);
if(!(((uintptr_t)ED)&3))
fpu_fcom(emu, *(float*)ED);
else {
memcpy(&f, ED, sizeof(float));
fpu_fcom(emu, f);
}
fpu_fcom(emu, *(float*)ED);
break;
case 3: /* FCOMP */
GETED(0);
if(!(((uintptr_t)ED)&3))
fpu_fcom(emu, *(float*)ED);
else {
memcpy(&f, ED, sizeof(float));
fpu_fcom(emu, f);
}
fpu_fcom(emu, *(float*)ED);
fpu_do_pop(emu);
break;
case 4: /* FSUB ST0, float */
GETED(0);
if(!(((uintptr_t)ED)&3))
ST0.d -= *(float*)ED;
else {
memcpy(&f, ED, sizeof(float));
ST0.d -= f;
}
ST0.d -= *(float*)ED;
break;
case 5: /* FSUBR ST0, float */
GETED(0);
if(!(((uintptr_t)ED)&3))
ST0.d = *(float*)ED - ST0.d;
else {
memcpy(&f, ED, sizeof(float));
ST0.d = f - ST0.d;
}
ST0.d = *(float*)ED - ST0.d;
break;
case 6: /* FDIV ST0, float */
GETED(0);
if(!(((uintptr_t)ED)&3))
ST0.d /= *(float*)ED;
else {
memcpy(&f, ED, sizeof(float));
ST0.d /= f;
}
ST0.d /= *(float*)ED;
break;
case 7: /* FDIVR ST0, float */
GETED(0);
if(!(((uintptr_t)ED)&3))
ST0.d = *(float*)ED / ST0.d;
else {
memcpy(&f, ED, sizeof(float));
ST0.d = f / ST0.d;
}
ST0.d = *(float*)ED / ST0.d;
break;
default:
return 0;

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
@ -235,30 +232,15 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 0: /* FLD ST0, Ed float */
GETED(0);
fpu_do_push(emu);
if(!(((uintptr_t)ED)&3))
ST0.d = *(float*)ED;
else {
memcpy(&f, ED, sizeof(float));
ST0.d = f;
}
ST0.d = *(float*)ED;
break;
case 2: /* FST Ed, ST0 */
GETED(0);
if(!(((uintptr_t)ED)&3))
*(float*)ED = ST0.d;
else {
f = ST0.d;
memcpy(ED, &f, sizeof(float));
}
*(float*)ED = ST0.d;
break;
case 3: /* FSTP Ed, ST0 */
GETED(0);
if(!(((uintptr_t)ED)&3))
*(float*)ED = ST0.d;
else {
f = ST0.d;
memcpy(ED, &f, sizeof(float));
}
*(float*)ED = ST0.d;
fpu_do_pop(emu);
break;
case 4: /* FLDENV m */
@ -268,9 +250,8 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 5: /* FLDCW Ew */
GETEW(0);
emu->cw = EW->word[0];
emu->cw.x16 = EW->word[0];
// do something with cw?
emu->round = (fpu_round_t)((emu->cw >> 10) & 3);
break;
case 6: /* FNSTENV m */
// warning, incomplete
@ -282,7 +263,7 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 7: /* FNSTCW Ew */
GETEW(0);
EW->word[0] = emu->cw;
EW->word[0] = emu->cw.x16;
break;
default:
return 0;

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
@ -136,15 +133,15 @@ uintptr_t RunDB(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 1: /* FISTTP Ed, ST0 */
GETED(0);
tmp32s = ST0.d; // TODO: Handling of FPU Exception
if(tmp32s==0x7fffffff && isgreater(ST0.d, (double)(int32_t)0x7fffffff))
tmp32s = 0x80000000;
if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, (double)(int32_t)0x80000000) || !isfinite(ST0.d))
ED->sdword[0] = 0x80000000;
else
ED->sdword[0] = ST0.d;
fpu_do_pop(emu);
ED->sdword[0] = tmp32s;
break;
case 2: /* FIST Ed, ST0 */
GETED(0);
if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, -(double)(int32_t)0x7fffffff) || !isfinite(ST0.d))
if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, (double)(int32_t)0x80000000) || !isfinite(ST0.d))
ED->sdword[0] = 0x80000000;
else {
volatile int32_t tmp = fpu_round(emu, ST0.d); // tmp to avoid BUS ERROR
@ -153,7 +150,7 @@ uintptr_t RunDB(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 3: /* FISTP Ed, ST0 */
GETED(0);
if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, -(double)(int32_t)0x7fffffff) || !isfinite(ST0.d))
if(isgreater(ST0.d, (double)(int32_t)0x7fffffff) || isless(ST0.d, (double)(int32_t)0x80000000) || !isfinite(ST0.d))
ED->sdword[0] = 0x80000000;
else {
volatile int32_t tmp = fpu_round(emu, ST0.d); // tmp to avoid BUS ERROR

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
@ -117,37 +114,20 @@ uintptr_t RunDD(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 0: /* FLD double */
GETED(0);
fpu_do_push(emu);
if(!(((uintptr_t)ED)&7))
ST0.d = *(double*)ED;
else {
memcpy(&ST0.d, ED, sizeof(double));
}
ST0.d = *(double*)ED;
break;
case 1: /* FISTTP ED qword */
GETED(0);
if(!(((uintptr_t)ED)&7))
*(int64_t*)ED = ST0.d;
else {
int64_t i64 = ST0.d;
memcpy(ED, &i64, sizeof(int64_t));
}
*(int64_t*)ED = ST0.d;
fpu_do_pop(emu);
break;
case 2: /* FST double */
GETED(0);
if(!(((uintptr_t)ED)&7))
*(double*)ED = ST0.d;
else {
memcpy(ED, &ST0.d, sizeof(double));
}
*(double*)ED = ST0.d;
break;
case 3: /* FSTP double */
GETED(0);
if(!(((uintptr_t)ED)&7))
*(double*)ED = ST0.d;
else {
memcpy(ED, &ST0.d, sizeof(double));
}
*(double*)ED = ST0.d;
fpu_do_pop(emu);
break;
case 4: /* FRSTOR m108byte */

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
@ -132,14 +129,14 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr)
break;
case 2: /* FIST Ew, ST0 */
GETEW(0);
if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x7fff) || !isfinite(ST0.d))
if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x8000) || !isfinite(ST0.d))
EW->sword[0] = 0x8000;
else
EW->sword[0] = fpu_round(emu, ST0.d);
break;
case 3: /* FISTP Ew, ST0 */
GETEW(0);
if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x7fff) || !isfinite(ST0.d))
if(isgreater(ST0.d, (double)(int32_t)0x7fff) || isless(ST0.d, -(double)(int32_t)0x8000) || !isfinite(ST0.d))
EW->sword[0] = 0x8000;
else
EW->sword[0] = fpu_round(emu, ST0.d);
@ -168,7 +165,7 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr)
if(STll(0).sref==ST(0).sq)
ED->sq[0] = STll(0).sq;
else {
if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, -(double)(int64_t)0x7fffffffffffffffLL) || !isfinite(ST0.d))
if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d))
ED->sq[0] = 0x8000000000000000LL;
else
ED->sq[0] = fpu_round(emu, ST0.d);

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,9 +17,12 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "my_cpuid.h"
#include "bridge.h"
#ifdef DYNAREC
#include "../dynarec/native_lock.h"
#endif
#include "modrm.h"
@ -36,6 +36,9 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
int64_t tmp64s;
uint64_t tmp64u, tmp64u2;
reg64_t *oped, *opgd;
#ifdef USE_CAS
uint64_t tmpcas;
#endif
opcode = F8;
// REX prefix before the F0 are ignored
@ -46,6 +49,60 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
}
switch(opcode) {
#ifdef DYNAREC
#define GO(B, OP) \
case B+0: \
nextop = F8; \
GETEB(0); \
GETGB; \
do { \
tmp8u = native_lock_read_b(EB); \
tmp8u = OP##8(emu, tmp8u, GB); \
} while (native_lock_write_b(EB, tmp8u)); \
break; \
case B+1: \
nextop = F8; \
GETED(0); \
GETGD; \
if(rex.w) { \
do { \
tmp64u = native_lock_read_dd(ED); \
tmp64u = OP##64(emu, tmp64u, GD->q[0]); \
} while (native_lock_write_dd(ED, tmp64u)); \
} else { \
do { \
tmp32u = native_lock_read_d(ED); \
tmp32u = OP##32(emu, tmp32u, GD->dword[0]); \
} while (native_lock_write_d(ED, tmp32u)); \
if(MODREG) \
ED->dword[1] = 0; \
} \
break; \
case B+2: \
nextop = F8; \
GETEB(0); \
GETGB; \
GB = OP##8(emu, GB, EB->byte[0]); \
break; \
case B+3: \
nextop = F8; \
GETED(0); \
GETGD; \
if(rex.w) \
GD->q[0] = OP##64(emu, GD->q[0], ED->q[0]); \
else \
GD->q[0] = OP##32(emu, GD->dword[0], ED->dword[0]); \
break; \
case B+4: \
R_AL = OP##8(emu, R_AL, F8); \
break; \
case B+5: \
if(rex.w) \
R_RAX = OP##64(emu, R_RAX, F32S64); \
else \
R_RAX = OP##32(emu, R_EAX, F32); \
break;
#else
#define GO(B, OP) \
case B+0: \
nextop = F8; \
@ -101,6 +158,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
R_RAX = OP##32(emu, R_EAX, F32); \
pthread_mutex_unlock(&emu->context->mutex_lock); \
break;
#endif
GO(0x00, add) /* ADD 0x00 -> 0x05 */
GO(0x08, or) /* OR 0x08 -> 0x0D */
GO(0x10, adc) /* ADC 0x10 -> 0x15 */
@ -118,11 +176,59 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETED(0);
GETGD;
tmp8u = GD->byte[0];
tmp64s = rex.w?GD->sq[0]:GD->sdword[0];
tmp8u=tmp64s&(rex.w?63:31);
tmp64s >>= (rex.w?6:5);
if(!MODREG)
{
ED=(reg64_t*)(((uint32_t*)(ED))+(tmp8u>>5));
ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));
}
#ifdef DYNAREC
if(rex.w) {
tmp8u&=63;
if(MODREG) {
if(ED->q[0] & (1LL<<tmp8u))
SET_FLAG(F_CF);
else {
ED->q[0] |= (1LL<<tmp8u);
CLEAR_FLAG(F_CF);
}
} else
do {
tmp64u = native_lock_read_dd(ED);
if(tmp64u & (1LL<<tmp8u)) {
SET_FLAG(F_CF);
tmp32s = 0;
} else {
tmp64u |= (1LL<<tmp8u);
CLEAR_FLAG(F_CF);
tmp32s = native_lock_write_dd(ED, tmp64u);
}
} while(tmp32s);
} else {
tmp8u&=31;
if(MODREG) {
if(ED->dword[0] & (1<<tmp8u))
SET_FLAG(F_CF);
else {
ED->dword[0] |= (1<<tmp8u);
CLEAR_FLAG(F_CF);
}
ED->dword[1] = 0;
} else
do {
tmp32u = native_lock_read_d(ED);
if(tmp32u & (1<<tmp8u)) {
SET_FLAG(F_CF);
tmp32s = 0;
} else {
tmp32u |= (1<<tmp8u);
CLEAR_FLAG(F_CF);
tmp32s = native_lock_write_d(ED, tmp32u);
}
} while(tmp32s);
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
tmp8u&=63;
@ -144,6 +250,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->dword[1] = 0;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xB0: /* CMPXCHG Eb,Gb */
@ -151,6 +258,18 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETGB;
GETEB(0);
#ifdef DYNAREC
do {
tmp8u = native_lock_read_b(EB);
cmp8(emu, R_AL, tmp8u);
if(ACCESS_FLAG(F_ZF)) {
tmp32s = native_lock_write_b(EB, GB);
} else {
R_AL = tmp8u;
tmp32s = 0;
}
} while(tmp32s);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
cmp8(emu, R_AL, EB->byte[0]);
if(ACCESS_FLAG(F_ZF)) {
@ -159,11 +278,55 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
R_AL = EB->byte[0];
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xB1: /* CMPXCHG Ed,Gd */
nextop = F8;
GETED(0);
GETGD;
#ifdef DYNAREC
if(rex.w)
if(((uintptr_t)ED)&7) {
do {
tmp64u = ED->q[0] & ~0xffLL;
tmp64u |= native_lock_read_b(ED);
cmp64(emu, R_RAX, tmp64u);
if(ACCESS_FLAG(F_ZF)) {
tmp32s = native_lock_write_b(ED, GD->q[0]&0xff);
if(!tmp32s)
ED->q[0] = GD->q[0];
} else {
R_RAX = tmp64u;
tmp32s = 0;
}
} while(tmp32s);
} else
do {
tmp64u = native_lock_read_dd(ED);
cmp64(emu, R_RAX, tmp64u);
if(ACCESS_FLAG(F_ZF)) {
tmp32s = native_lock_write_dd(ED, GD->q[0]);
} else {
R_RAX = tmp64u;
tmp32s = 0;
}
} while(tmp32s);
else {
do {
tmp32u = native_lock_read_d(ED);
cmp32(emu, R_EAX, tmp32u);
if(ACCESS_FLAG(F_ZF)) {
tmp32s = native_lock_write_d(ED, GD->dword[0]);
} else {
R_EAX = tmp32u;
tmp32s = 0;
}
} while(tmp32s);
emu->regs[_AX].dword[1] = 0;
if(MODREG)
ED->dword[1] = 0;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
cmp64(emu, R_RAX, ED->q[0]);
@ -184,6 +347,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->dword[1] = 0;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xB3: /* BTR Ed,Gd */
@ -191,12 +355,43 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETED(0);
GETGD;
tmp8u = GD->byte[0];
tmp64s = rex.w?GD->sq[0]:GD->sdword[0];
tmp8u=tmp64s&(rex.w?63:31);
tmp64s >>= (rex.w?6:5);
if(!MODREG)
{
ED=(reg64_t*)(((uint32_t*)(ED))+(tmp8u>>5));
ED=(reg64_t*)(((uintptr_t)(ED))+(tmp64s<<(rex.w?3:2)));
}
tmp8u&=rex.w?63:31;
#ifdef DYNAREC
if(rex.w)
do {
tmp64u = native_lock_read_dd(ED);
if(tmp64u & (1LL<<tmp8u)) {
SET_FLAG(F_CF);
tmp64u ^= (1LL<<tmp8u);
tmp32s = native_lock_write_dd(ED, tmp64u);
} else {
CLEAR_FLAG(F_CF);
tmp32s = 0;
}
} while(tmp32s);
else {
do {
tmp32u = native_lock_read_d(ED);
if(tmp32u & (1<<tmp8u)) {
SET_FLAG(F_CF);
tmp32u ^= (1<<tmp8u);
tmp32s = native_lock_write_d(ED, tmp32u);
} else {
CLEAR_FLAG(F_CF);
tmp32s = 0;
}
} while(tmp32s);
if(MODREG)
ED->dword[1] = 0;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
if(ED->q[0] & (1<<tmp8u)) {
@ -214,6 +409,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->dword[1] = 0;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xBA:
@ -241,6 +437,35 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
CHECK_FLAGS(emu);
GETED(1);
tmp8u = F8;
#ifdef DYNAREC
if(rex.w) {
tmp8u&=63;
do {
tmp64u = native_lock_read_dd(ED);
if(tmp64u & (1LL<<tmp8u)) {
SET_FLAG(F_CF);
tmp32s = 0;
} else {
tmp64u ^= (1LL<<tmp8u);
tmp32s = native_lock_write_dd(ED, tmp64u);
CLEAR_FLAG(F_CF);
}
} while(tmp32s);
} else {
tmp8u&=31;
do {
tmp32u = native_lock_read_d(ED);
if(tmp32u & (1<<tmp8u)) {
SET_FLAG(F_CF);
tmp32s = 0;
} else {
tmp32u ^= (1<<tmp8u);
tmp32s = native_lock_write_d(ED, tmp32u);
CLEAR_FLAG(F_CF);
}
} while(tmp32s);
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
tmp8u&=63;
@ -260,11 +485,41 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
}
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 6: /* BTR Ed, Ib */
CHECK_FLAGS(emu);
GETED(1);
tmp8u = F8;
#ifdef DYNAREC
if(rex.w) {
do {
tmp8u&=63;
tmp64u = native_lock_read_dd(ED);
if(tmp64u & (1LL<<tmp8u)) {
SET_FLAG(F_CF);
tmp64u ^= (1LL<<tmp8u);
tmp32s = native_lock_write_dd(ED, tmp64u);
} else {
tmp32s = 0;
CLEAR_FLAG(F_CF);
}
} while(tmp32s);
} else {
tmp8u&=31;
do {
tmp32u = native_lock_read_d(ED);
if(tmp32u & (1<<tmp8u)) {
SET_FLAG(F_CF);
tmp32u ^= (1<<tmp8u);
tmp32s = native_lock_write_d(ED, tmp32u);
} else {
CLEAR_FLAG(F_CF);
tmp32s = 0;
}
} while(tmp32s);
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
tmp8u&=63;
@ -282,11 +537,37 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
CLEAR_FLAG(F_CF);
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 7: /* BTC Ed, Ib */
CHECK_FLAGS(emu);
GETED(1);
tmp8u = F8;
#ifdef DYNAREC
if(rex.w) {
tmp8u&=63;
do {
tmp64u = native_lock_read_dd(ED);
if(tmp64u & (1LL<<tmp8u))
SET_FLAG(F_CF);
else
CLEAR_FLAG(F_CF);
tmp64u ^= (1LL<<tmp8u);
tmp32s = native_lock_write_dd(ED, tmp64u);
} while(tmp32s);
} else {
tmp8u&=31;
do {
tmp32u = native_lock_read_d(ED);
if(tmp32u & (1<<tmp8u))
SET_FLAG(F_CF);
else
CLEAR_FLAG(F_CF);
tmp32u ^= (1<<tmp8u);
tmp32s = native_lock_write_d(ED, tmp32u);
} while(tmp32s);
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
tmp8u&=63;
@ -304,6 +585,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->dword[0] ^= (1<<tmp8u);
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
@ -315,16 +597,50 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEB(0);
GETGB;
#ifdef DYNAREC
do {
tmp8u = native_lock_read_b(EB);
tmp8u2 = add8(emu, tmp8u, GB);
} while(native_lock_write_b(EB, tmp8u2));
GB = tmp8u;
#else
pthread_mutex_lock(&emu->context->mutex_lock);
tmp8u = add8(emu, EB->byte[0], GB);
GB = EB->byte[0];
EB->byte[0] = tmp8u;
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xC1: /* XADD Gd,Ed */
nextop = F8;
GETED(0);
GETGD;
#ifdef DYNAREC
if(rex.w) {
do {
tmp64u = native_lock_read_dd(ED);
tmp64u2 = add64(emu, tmp64u, GD->q[0]);
} while(native_lock_write_dd(ED, tmp64u2));
GD->q[0] = tmp64u;
} else {
if(((uintptr_t)ED)&3) {
do {
tmp32u = ED->dword[0] & ~0xff;
tmp32u |= native_lock_read_b(ED);
tmp32u2 = add32(emu, tmp32u, GD->dword[0]);
} while(native_lock_write_b(ED, tmp32u2&0xff));
ED->dword[0] = tmp32u2;
} else {
do {
tmp32u = native_lock_read_d(ED);
tmp32u2 = add32(emu, tmp32u, GD->dword[0]);
} while(native_lock_write_d(ED, tmp32u2));
}
GD->q[0] = tmp32u;
if(MODREG)
ED->dword[1] = 0;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
tmp64u = add64(emu, ED->q[0], GD->q[0]);
@ -339,6 +655,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->dword[0] = tmp32u;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xC7: /* CMPXCHG8B Gq */
@ -348,6 +665,34 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 1:
CHECK_FLAGS(emu);
GETGD;
#ifdef DYNAREC
if(rex.w)
do {
native_lock_read_dq(&tmp64u, &tmp64u2, ED);
if(R_RAX == tmp64u && R_RDX == tmp64u2) {
SET_FLAG(F_ZF);
tmp32s = native_lock_write_dq(R_RBX, R_RCX, ED);
} else {
CLEAR_FLAG(F_ZF);
R_RAX = tmp64u;
R_RDX = tmp64u2;
tmp32s = 0;
}
} while(tmp32s);
else
do {
tmp64u = native_lock_read_dd(ED);
if((R_EAX == (tmp64u&0xffffffff)) && (R_EDX == ((tmp64u>>32)&0xffffffff))) {
SET_FLAG(F_ZF);
tmp32s = native_lock_write_dd(ED, R_EBX|(((uint64_t)R_ECX)<<32));
} else {
CLEAR_FLAG(F_ZF);
R_RAX = tmp64u&0xffffffff;
R_RDX = (tmp64u>>32)&0xffffffff;
tmp32s = 0;
}
} while(tmp32s);
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
tmp64u = ED->q[0];
@ -375,6 +720,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
}
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
return 0;
@ -393,6 +739,18 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEB(1);
tmp8u = F8;
#ifdef DYNAREC
switch((nextop>>3)&7) {
case 0: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = add8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break;
case 1: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = or8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break;
case 2: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = adc8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break;
case 3: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = sbb8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break;
case 4: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = and8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break;
case 5: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = sub8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break;
case 6: do { tmp8u2 = native_lock_read_b(EB); tmp8u2 = xor8(emu, tmp8u2, tmp8u);} while(native_lock_write_b(EB, tmp8u2)); break;
case 7: cmp8(emu, EB->byte[0], tmp8u); break;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
switch((nextop>>3)&7) {
case 0: EB->byte[0] = add8(emu, EB->byte[0], tmp8u); break;
@ -405,6 +763,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
case 7: cmp8(emu, EB->byte[0], tmp8u); break;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0x81: /* GRP Ed,Id */
case 0x83: /* GRP Ed,Ib */
@ -415,6 +774,43 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
tmp64u = (uint64_t)tmp64s;
} else
tmp64u = F32S64;
#ifdef DYNAREC
if(rex.w) {
switch((nextop>>3)&7) {
case 0: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = add64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break;
case 1: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = or64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break;
case 2: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = adc64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break;
case 3: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = sbb64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break;
case 4: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = and64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break;
case 5: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = sub64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break;
case 6: do { tmp64u2 = native_lock_read_dd(ED); tmp64u2 = xor64(emu, tmp64u2, tmp64u);} while(native_lock_write_dd(ED, tmp64u2)); break;
case 7: cmp64(emu, ED->q[0], tmp64u); break;
}
} else {
if(MODREG)
switch((nextop>>3)&7) {
case 0: ED->q[0] = add32(emu, ED->dword[0], tmp64u); break;
case 1: ED->q[0] = or32(emu, ED->dword[0], tmp64u); break;
case 2: ED->q[0] = adc32(emu, ED->dword[0], tmp64u); break;
case 3: ED->q[0] = sbb32(emu, ED->dword[0], tmp64u); break;
case 4: ED->q[0] = and32(emu, ED->dword[0], tmp64u); break;
case 5: ED->q[0] = sub32(emu, ED->dword[0], tmp64u); break;
case 6: ED->q[0] = xor32(emu, ED->dword[0], tmp64u); break;
case 7: cmp32(emu, ED->dword[0], tmp64u); break;
}
else
switch((nextop>>3)&7) {
case 0: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = add32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break;
case 1: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = or32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break;
case 2: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = adc32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break;
case 3: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = sbb32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break;
case 4: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = and32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break;
case 5: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = sub32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break;
case 6: do { tmp32u2 = native_lock_read_d(ED); tmp32u2 = xor32(emu, tmp32u2, tmp64u);} while(native_lock_write_d(ED, tmp32u2)); break;
case 7: cmp32(emu, ED->dword[0], tmp64u); break;
}
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
switch((nextop>>3)&7) {
@ -452,9 +848,59 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
}
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0x86: /* XCHG Eb,Gb */
nextop = F8;
#ifdef DYNAREC
GETEB(0);
GETGB;
if(MODREG) { // reg / reg: no lock
tmp8u = GB;
GB = EB->byte[0];
EB->byte[0] = tmp8u;
} else {
do {
tmp8u = native_lock_read_b(EB);
} while(native_lock_write_b(EB, GB));
GB = tmp8u;
}
#else
GETEB(0);
GETGB;
if(!MODREG)
pthread_mutex_lock(&emu->context->mutex_lock); // XCHG always LOCK (but when accessing memory only)
tmp8u = GB;
GB = EB->byte[0];
EB->byte[0] = tmp8u;
if(!MODREG)
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0x87: /* XCHG Ed,Gd */
nextop = F8;
#ifdef DYNAREC
GETED(0);
GETGD;
if(MODREG) {
if(rex.w) {
tmp64u = GD->q[0];
GD->q[0] = ED->q[0];
ED->q[0] = tmp64u;
} else {
tmp32u = GD->dword[0];
GD->q[0] = ED->dword[0];
ED->q[0] = tmp32u;
}
} else {
if(rex.w) {
GD->q[0] = native_lock_xchg_dd(ED, GD->q[0]);
} else {
GD->dword[0] = native_lock_xchg_d(ED, GD->dword[0]);
}
}
#else
GETED(0);
GETGD;
pthread_mutex_lock(&emu->context->mutex_lock);
@ -471,6 +917,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->dword[0] = tmp32u;
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 0xF6: /* GRP3 Eb(,Ib) */
@ -479,20 +926,93 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEB((tmp8u<2)?1:0);
switch(tmp8u) {
case 2: /* NOT Eb */
#ifdef DYNAREC
do {
tmp8u2 = native_lock_read_b(EB);
tmp8u2 = not8(emu, tmp8u2);
} while(native_lock_write_b(EB, tmp8u2));
#else
pthread_mutex_lock(&emu->context->mutex_lock);
EB->byte[0] = not8(emu, EB->byte[0]);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
return 0;
}
break;
case 0xFE: /* GRP 5 Eb */
nextop = F8;
GETED(0);
switch((nextop>>3)&7) {
case 0: /* INC Eb */
#ifdef DYNAREC
do {
tmp8u = native_lock_read_b(ED);
} while(native_lock_write_b(ED, inc8(emu, tmp8u)));
#else
pthread_mutex_lock(&emu->context->mutex_lock);
ED->byte[0] = inc8(emu, ED->byte[0]);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 1: /* DEC Ed */
#ifdef DYNAREC
do {
tmp8u = native_lock_read_b(ED);
} while(native_lock_write_b(ED, dec8(emu, tmp8u)));
#else
pthread_mutex_lock(&emu->context->mutex_lock);
ED->byte[0] = dec8(emu, ED->byte[0]);
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
printf_log(LOG_NONE, "Illegal Opcode 0xF0 0xFE 0x%02X 0x%02X\n", nextop, PK(0));
emu->quit=1;
emu->error |= ERR_ILLEGAL;
break;
}
break;
case 0xFF: /* GRP 5 Ed */
nextop = F8;
GETED(0);
switch((nextop>>3)&7) {
case 0: /* INC Ed */
#ifdef DYNAREC
if(rex.w)
if(((uintptr_t)ED)&7) {
// unaligned
do {
tmp64u = ED->q[0] & 0xffffffffffffff00LL;
tmp64u |= native_lock_read_b(ED);
tmp64u = inc64(emu, tmp64u);
} while(native_lock_write_b(ED, tmp64u&0xff));
ED->q[0] = tmp64u;
}
else
do {
tmp64u = native_lock_read_dd(ED);
} while(native_lock_write_dd(ED, inc64(emu, tmp64u)));
else {
if((uintptr_t)ED&3) {
//meh.
do {
tmp32u = ED->dword[0];
tmp32u &=~0xff;
tmp32u |= native_lock_read_b(ED);
tmp32u = inc32(emu, tmp32u);
} while(native_lock_write_b(ED, tmp32u&0xff));
ED->dword[0] = tmp32u;
} else {
do {
tmp32u = native_lock_read_d(ED);
} while(native_lock_write_d(ED, inc32(emu, tmp32u)));
}
if(MODREG) ED->dword[1] = 0;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
ED->q[0] = inc64(emu, ED->q[0]);
@ -503,8 +1023,31 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->dword[0] = inc32(emu, ED->dword[0]);
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
case 1: /* DEC Ed */
#ifdef DYNAREC
if(rex.w)
if(((uintptr_t)ED)&7) {
// unaligned
do {
tmp64u = ED->q[0] & 0xffffffffffffff00LL;
tmp64u |= native_lock_read_b(ED);
tmp64u = dec64(emu, tmp64u);
} while(native_lock_write_b(ED, tmp64u&0xff));
ED->q[0] = tmp64u;
}
else
do {
tmp64u = native_lock_read_dd(ED);
} while(native_lock_write_dd(ED, dec64(emu, tmp64u)));
else {
do {
tmp32u = native_lock_read_d(ED);
} while(native_lock_write_d(ED, dec32(emu, tmp32u)));
if(MODREG) ED->dword[1] = 0;
}
#else
pthread_mutex_lock(&emu->context->mutex_lock);
if(rex.w) {
ED->q[0] = dec64(emu, ED->q[0]);
@ -515,6 +1058,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
ED->dword[0] = dec32(emu, ED->dword[0]);
}
pthread_mutex_unlock(&emu->context->mutex_lock);
#endif
break;
default:
printf_log(LOG_NONE, "Illegal Opcode 0xF0 0xFF 0x%02X 0x%02X\n", nextop, PK(0));

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,12 +17,12 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr)
uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
{
uint8_t opcode;
uint8_t nextop;
@ -48,7 +45,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEX(0);
GETGX;
GX->q[0] = EX->q[0];
if((nextop&0xC0)!=0xC0) {
if(!MODREG) {
// EX is not a register
GX->q[1] = 0;
}
@ -87,9 +84,15 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEX(0);
GETGD;
if(rex.w)
GD->sq[0] = EX->d[0];
if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffffffffffffLL)
GD->q[0] = 0x8000000000000000LL;
else
GD->sq[0] = EX->d[0];
else {
GD->sdword[0] = EX->d[0];
if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff)
GD->dword[0] = 0x80000000;
else
GD->sdword[0] = EX->d[0];
GD->dword[1] = 0;
}
break;
@ -98,35 +101,41 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEX(0);
GETGD;
if(rex.w) {
switch((emu->mxcsr>>13)&3) {
case ROUND_Nearest:
GD->q[0] = floor(EX->d[0]+0.5);
break;
case ROUND_Down:
GD->q[0] = floor(EX->d[0]);
break;
case ROUND_Up:
GD->q[0] = ceil(EX->d[0]);
break;
case ROUND_Chop:
GD->q[0] = EX->d[0];
break;
}
if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffffffffffffLL)
GD->q[0] = 0x8000000000000000LL;
else
switch(emu->mxcsr.f.MXCSR_RC) {
case ROUND_Nearest:
GD->sq[0] = nearbyint(EX->d[0]);
break;
case ROUND_Down:
GD->sq[0] = floor(EX->d[0]);
break;
case ROUND_Up:
GD->sq[0] = ceil(EX->d[0]);
break;
case ROUND_Chop:
GD->sq[0] = EX->d[0];
break;
}
} else {
switch((emu->mxcsr>>13)&3) {
case ROUND_Nearest:
GD->sdword[0] = floor(EX->d[0]+0.5);
break;
case ROUND_Down:
GD->sdword[0] = floor(EX->d[0]);
break;
case ROUND_Up:
GD->sdword[0] = ceil(EX->d[0]);
break;
case ROUND_Chop:
GD->sdword[0] = EX->d[0];
break;
}
if(isnan(EX->d[0]) || isinf(EX->d[0]) || EX->d[0]>0x7fffffff)
GD->dword[0] = 0x80000000;
else
switch(emu->mxcsr.f.MXCSR_RC) {
case ROUND_Nearest:
GD->sdword[0] = nearbyint(EX->d[0]);
break;
case ROUND_Down:
GD->sdword[0] = floor(EX->d[0]);
break;
case ROUND_Up:
GD->sdword[0] = ceil(EX->d[0]);
break;
case ROUND_Chop:
GD->sdword[0] = EX->d[0];
break;
}
GD->dword[1] = 0;
}
break;
@ -260,7 +269,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GOCOND(0x80
, tmp32s = F32S; CHECK_FLAGS(emu);
, addr += tmp32s;
,
,,STEP3
) /* 0x80 -> 0x8F Jxx */
case 0xC2: /* CMPSD Gx, Ex, Ib */
@ -303,10 +312,10 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr)
nextop = F8;
GETEX(0);
GETGX;
switch((emu->mxcsr>>13)&3) {
switch(emu->mxcsr.f.MXCSR_RC) {
case ROUND_Nearest:
GX->sd[0] = floor(EX->d[0]+0.5);
GX->sd[1] = floor(EX->d[1]+0.5);
GX->sd[0] = nearbyint(EX->d[0]);
GX->sd[1] = nearbyint(EX->d[1]);
break;
case ROUND_Down:
GX->sd[0] = floor(EX->d[0]);

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE
#include <stdint.h>
#include <stdio.h>
@ -12,7 +9,7 @@
#include <unistd.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -20,7 +17,7 @@
#include "x64primop.h"
#include "x64trace.h"
#include "x87emu_private.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "bridge.h"
#include "modrm.h"
@ -93,9 +90,15 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEX(0);
GETGD;
if (rex.w)
GD->sq[0] = EX->f[0];
if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>0x7fffffffffffffffLL)
GD->q[0] = 0x8000000000000000LL;
else
GD->sq[0] = EX->f[0];
else {
GD->sdword[0] = EX->f[0];
if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>0x7fffffff)
GD->dword[0] = 0x80000000;
else
GD->sdword[0] = EX->f[0];
GD->dword[1] = 0;
}
break;
@ -104,35 +107,41 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
GETEX(0);
GETGD;
if(rex.w) {
switch((emu->mxcsr>>13)&3) {
case ROUND_Nearest:
GD->sq[0] = floorf(EX->f[0]+0.5f);
break;
case ROUND_Down:
GD->sq[0] = floorf(EX->f[0]);
break;
case ROUND_Up:
GD->sq[0] = ceilf(EX->f[0]);
break;
case ROUND_Chop:
GD->sq[0] = EX->f[0];
break;
}
if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>0x7fffffffffffffffLL)
GD->q[0] = 0x8000000000000000LL;
else
switch(emu->mxcsr.f.MXCSR_RC) {
case ROUND_Nearest:
GD->sq[0] = nearbyintf(EX->f[0]);
break;
case ROUND_Down:
GD->sq[0] = floorf(EX->f[0]);
break;
case ROUND_Up:
GD->sq[0] = ceilf(EX->f[0]);
break;
case ROUND_Chop:
GD->sq[0] = EX->f[0];
break;
}
} else {
switch((emu->mxcsr>>13)&3) {
case ROUND_Nearest:
GD->sdword[0] = floorf(EX->f[0]+0.5f);
break;
case ROUND_Down:
GD->sdword[0] = floorf(EX->f[0]);
break;
case ROUND_Up:
GD->sdword[0] = ceilf(EX->f[0]);
break;
case ROUND_Chop:
GD->sdword[0] = EX->f[0];
break;
}
if(isnanf(EX->f[0]) || isinff(EX->f[0]) || EX->f[0]>0x7fffffff)
GD->dword[0] = 0x80000000;
else
switch(emu->mxcsr.f.MXCSR_RC) {
case ROUND_Nearest:
GD->sdword[0] = nearbyintf(EX->f[0]);
break;
case ROUND_Down:
GD->sdword[0] = floorf(EX->f[0]);
break;
case ROUND_Up:
GD->sdword[0] = ceilf(EX->f[0]);
break;
case ROUND_Chop:
GD->sdword[0] = EX->f[0];
break;
}
GD->dword[1] = 0;
}
break;
@ -272,7 +281,6 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
if(rex.w) {
tmp64u = ED->q[0];
if(tmp64u) {
CLEAR_FLAG(F_ZF);
tmp8u = 0;
while(!(tmp64u&(1LL<<tmp8u))) ++tmp8u;
GD->q[0] = tmp8u;
@ -286,16 +294,15 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
} else {
tmp32u = ED->dword[0];
if(tmp32u) {
CLEAR_FLAG(F_ZF);
tmp8u = 0;
while(!(tmp32u&(1<<tmp8u))) ++tmp8u;
GD->dword[0] = tmp8u;
GD->q[0] = tmp8u;
CONDITIONAL_SET_FLAG(tmp8u==0, F_ZF);
CLEAR_FLAG(F_CF);
} else {
CLEAR_FLAG(F_ZF);
SET_FLAG(F_CF);
GD->dword[0] = 32;
GD->q[0] = 32;
}
}
break;

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <stdint.h>
#include <stdio.h>
@ -23,7 +20,7 @@
#include <poll.h>
#include "debug.h"
#include "rvtransstack.h"
#include "box64stack.h"
#include "x64emu.h"
#include "x64run.h"
#include "x64emu_private.h"
@ -31,7 +28,7 @@
//#include "x64primop.h"
#include "x64trace.h"
//#include "myalign.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "callback.h"
#include "signals.h"
#include "x64tls.h"
@ -58,9 +55,13 @@ void* my_mmap64(x64emu_t* emu, void *addr, unsigned long length, int prot, int f
int my_munmap(x64emu_t* emu, void* addr, unsigned long length);
int my_mprotect(x64emu_t* emu, void *addr, unsigned long len, int prot);
void* my_mremap(x64emu_t* emu, void* old_addr, size_t old_size, size_t new_size, int flags, void* new_addr);
#ifndef NO_ALIGN
#ifndef NOALIGN
int32_t my_epoll_ctl(x64emu_t* emu, int32_t epfd, int32_t op, int32_t fd, void* event);
int32_t my_epoll_wait(x64emu_t* emu, int32_t epfd, void* events, int32_t maxevents, int32_t timeout);
int32_t my_epoll_pwait(x64emu_t* emu, int32_t epfd, void* events, int32_t maxevents, int32_t timeout, const sigset_t *sigmask);
pid_t my_vfork(x64emu_t* emu);
#endif
int32_t my_fcntl(x64emu_t* emu, int32_t a, int32_t b, void* c);
// cannot include <fcntl.h>, it conflict with some asm includes...
#ifndef O_NONBLOCK
@ -84,6 +85,9 @@ scwrap_t syscallwrap[] = {
//{ 4, __NR_stat, 2 }, // Need to align struct stat
//{ 5, __NR_fstat, 2},
//{ 6, __NR_lstat, 2},
#ifdef __NR_poll
{ 7, __NR_poll, 3},
#endif
{ 8, __NR_lseek, 3},
//{ 9, __NR_mmap, 6}, // wrapped to track mmap
//{ 10, __NR_mprotect, 3}, // same
@ -92,6 +96,8 @@ scwrap_t syscallwrap[] = {
//{ 13, __NR_rt_sigaction, 4}, // wrapped to use my_ version
{ 14, __NR_rt_sigprocmask, 4},
{ 16, __NR_ioctl, 3},
{ 17, __NR_pread64, 4},
{ 18, __NR_pwrite64, 4},
{ 20, __NR_writev, 3},
#ifdef __NR_access
{ 21, __NR_access, 2},
@ -99,11 +105,16 @@ scwrap_t syscallwrap[] = {
#ifdef __NR_pipe
{ 22, __NR_pipe, 1},
#endif
{ 24, __NR_sched_yield, 0},
#ifdef __NR_select
{ 23, __NR_select, 5},
#endif
//{ 25, __NR_mremap, 5}, // wrapped to track protection
{ 27, __NR_mincore, 3},
{ 28, __NR_madvise, 3},
#ifdef __NR_dup2
{ 33, __NR_dup2, 2},
#endif
{ 35, __NR_nanosleep, 2},
{ 39, __NR_getpid, 0},
{ 41, __NR_socket, 3},
@ -113,19 +124,24 @@ scwrap_t syscallwrap[] = {
{ 45, __NR_recvfrom, 6},
{ 46, __NR_sendmsg, 3},
{ 47, __NR_recvmsg, 3},
{ 51, __NR_getsockname, 3},
{ 52, __NR_getpeername, 3},
{ 53, __NR_socketpair, 4},
{ 54, __NR_setsockopt, 5},
{ 55, __NR_getsockopt, 5},
//{56, __NR_clone, 5},
#ifdef __NR_fork
{ 57, __NR_fork, 0 }, // should wrap this one, because of the struct pt_regs (the only arg)?
#endif
#ifdef __NR_vfork
{58, __NR_vfork, 0},
#endif
//{58, __NR_vfork, 0},
{ 60, __NR_exit, 1}, // Nees wrapping?
{ 61, __NR_wait4, 4},
{ 62, __NR_kill, 2 },
//{ 63, __NR_uname, 1}, // Needs wrapping, use old_utsname
{ 66, __NR_semctl, 4},
//{ 72, __NR_fnctl, 3}, // Needs wrapping, and not always defined anyway
{ 73, __NR_flock, 2},
{ 74, __NR_fsync, 1},
#ifdef __NR_getdents
{ 78, __NR_getdents, 3},
#endif
@ -143,6 +159,7 @@ scwrap_t syscallwrap[] = {
{ 96, __NR_gettimeofday, 2},
{ 97, __NR_getrlimit, 2},
{ 101, __NR_ptrace, 4},
{ 112, __NR_setsid, 0},
{ 118, __NR_getresuid, 3},
{ 120, __NR_getresgid, 3},
{ 125, __NR_capget, 2},
@ -162,6 +179,11 @@ scwrap_t syscallwrap[] = {
{ 202, __NR_futex, 6},
{ 203, __NR_sched_setaffinity, 3},
{ 204, __NR_sched_getaffinity, 3},
{ 206, __NR_io_setup, 2},
{ 207, __NR_io_destroy, 1},
{ 208, __NR_io_getevents, 4},
{ 209, __NR_io_submit, 3},
{ 210, __NR_io_cancel, 3},
#ifdef __NR_epoll_create
{ 213, __NR_epoll_create, 1},
#endif
@ -172,22 +194,36 @@ scwrap_t syscallwrap[] = {
{ 229, __NR_clock_getres, 2},
{ 230, __NR_clock_nanosleep, 4},
{ 231, __NR_exit_group, 1},
#ifdef __NR_epoll_wait
#if defined(__NR_epoll_wait) && defined(NOALIGN)
{ 232, __NR_epoll_wait, 4},
#endif
#if defined(__NR_epoll_ctl) && defined(NOALIGN)
{ 233, __NR_epoll_ctl, 4},
#endif
{ 234, __NR_tgkill, 3},
{ 247, __NR_waitid, 5},
#ifdef __NR_inotify_init
{ 253, __NR_inotify_init, 0}, //0xFD
#endif
{ 254, __NR_inotify_add_watch, 3},
{ 255, __NR_inotify_rm_watch, 2},
#ifdef NOALIGN
{ 257, __NR_openat, 4},
#endif
{ 258, __NR_mkdirat, 3},
//{ 262, __NR_fstatat, 4},
{ 263, __NR_unlinkat, 3},
#ifdef __NR_renameat
{ 264, __NR_renameat, 4},
#endif
{ 267, __NR_readlinkat, 4},
{ 270, __NR_pselect6, 6},
{ 272, __NR_unshare, 1},
{ 273, __NR_set_robust_list, 2},
{ 274, __NR_get_robust_list, 3},
#ifdef NOALIGN
{ 281, __NR_epoll_pwait, 6},
#endif
#ifdef _NR_eventfd
{ 284, __NR_eventfd, 1},
#endif
@ -196,11 +232,12 @@ scwrap_t syscallwrap[] = {
{ 292, __NR_dup3, 3},
{ 293, __NR_pipe2, 2},
{ 294, __NR_inotify_init1, 1},
{ 297, __NR_rt_tgsigqueueinfo, 4},
{ 298, __NR_perf_event_open, 5},
{ 302, __NR_prlimit64, 4},
{ 309, __NR_getcpu, 3}, // need wrapping?
{ 315, __NR_sched_getattr, 4},
{ 317, __NR_seccomp, 3},
//{ 317, __NR_seccomp, 3},
{ 318, __NR_getrandom, 3},
{ 319, __NR_memfd_create, 2},
{ 324, __NR_membarrier, 2},
@ -245,21 +282,31 @@ ssize_t DirentFromDirent64(void* dest, void* source, ssize_t count)
{
nat_linux_dirent64_t *src = (nat_linux_dirent64_t*)source;
x86_linux_dirent_t *dst = (x86_linux_dirent_t*)dest;
ssize_t ret = count;
x86_linux_dirent_t *old = NULL;
ssize_t ret = 0;
while(count>0) {
dst->d_ino = src->d_ino;
dst->d_reclen = src->d_reclen+1;
strcpy(dst->d_name, src->d_name);
dst->d_off = src->d_off?(src->d_off+1):0;
*(uint8_t*)((uintptr_t)dst + dst->d_reclen -2) = 0;
*(uint8_t*)((uintptr_t)dst + dst->d_reclen -1) = src->d_type;
ssize_t sz = src->d_reclen+sizeof(x86_linux_dirent_t)-sizeof(nat_linux_dirent64_t)+2;
if(sz>=count) {
dst->d_ino = src->d_ino;
dst->d_reclen = sz;
ret+=sz;
strcpy(dst->d_name, src->d_name);
dst->d_off = src->d_off?(src->d_off+sizeof(x86_linux_dirent_t)-sizeof(nat_linux_dirent64_t)+2):0;
*(uint8_t*)((uintptr_t)dst + dst->d_reclen -2) = 0;
*(uint8_t*)((uintptr_t)dst + dst->d_reclen -1) = src->d_type;
count -= src->d_reclen;
ret += 1;
src = (nat_linux_dirent64_t*)(((uintptr_t)src) + src->d_reclen);
dst = (x86_linux_dirent_t*)(((uintptr_t)dst) + dst->d_reclen);
count -= src->d_reclen;
ret += 1;
old = dst;
src = (nat_linux_dirent64_t*)(((uintptr_t)src) + src->d_reclen);
dst = (x86_linux_dirent_t*)(((uintptr_t)dst) + dst->d_reclen);
} else {
if(old)
old->d_off = 0;
count = 0;
}
}
return ret;
return (count<0)?count:ret;
}
#endif
@ -308,7 +355,22 @@ void EXPORT x64Syscall(x64emu_t *emu)
{
RESET_FLAGS(emu);
uint32_t s = R_EAX; // EAX? (syscalls only go up to 547 anyways)
printf_log(LOG_DEBUG, "%p: Calling syscall 0x%02X (%d) %p %p %p %p %p %p", (void*)R_RIP, s, s, (void*)R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_R10, (void*)R_R8, (void*)R_R9);
int log = 0;
char t_buff[256] = "\0";
char t_buffret[128] = "\0";
char buff2[64] = "\0";
char* buff = NULL;
char* buffret = NULL;
if(box64_log>=LOG_DEBUG || cycle_log) {
log = 1;
buff = cycle_log?my_context->log_call[my_context->current_line]:t_buff;
buffret = cycle_log?my_context->log_ret[my_context->current_line]:t_buffret;
if(cycle_log)
my_context->current_line = (my_context->current_line+1)%cycle_log;
snprintf(buff, 255, "%04d|%p: Calling syscall 0x%02X (%d) %p %p %p %p %p %p", GetTID(), (void*)R_RIP, s, s, (void*)R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_R10, (void*)R_R8, (void*)R_R9);
if(!cycle_log)
printf_log(LOG_NONE, "%s", buff);
}
// check wrapper first
int cnt = sizeof(syscallwrap) / sizeof(scwrap_t);
for (int i=0; i<cnt; i++) {
@ -317,8 +379,8 @@ void EXPORT x64Syscall(x64emu_t *emu)
switch(syscallwrap[i].nbpars) {
case 0: *(int64_t*)&R_RAX = syscall(sc); break;
case 1: *(int64_t*)&R_RAX = syscall(sc, R_RDI); break;
case 2: if(s==33) {printf_dump(LOG_DEBUG, " => sys_access(\"%s\", %ld)\n", (char*)R_RDI, R_RSI);}; *(int64_t*)&R_RAX = syscall(sc, R_RDI, R_RSI); break;
case 3: *(int64_t*)&R_RAX = syscall(sc, R_RDI, R_RSI, R_RDX); break;
case 2: if(s==33) {if(log) snprintf(buff2, 63, " [sys_access(\"%s\", %ld)]", (char*)R_RDI, R_RSI);}; *(int64_t*)&R_RAX = syscall(sc, R_RDI, R_RSI); break;
case 3: if(s==42) {if(log) snprintf(buff2, 63, " [sys_connect(%d, %p[type=%d], %d)]", R_EDI, (void*)R_RSI, *(unsigned short*)R_RSI, R_EDX);}; if(s==258) {if(log) snprintf(buff2, 63, " [sys_mkdirat(%d, %s, 0x%x]", R_EDI, (char*)R_RSI, R_EDX);}; *(int64_t*)&R_RAX = syscall(sc, R_RDI, R_RSI, R_RDX); break;
case 4: *(int64_t*)&R_RAX = syscall(sc, R_RDI, R_RSI, R_RDX, R_R10); break;
case 5: *(int64_t*)&R_RAX = syscall(sc, R_RDI, R_RSI, R_RDX, R_R10, R_R8); break;
case 6: *(int64_t*)&R_RAX = syscall(sc, R_RDI, R_RSI, R_RDX, R_R10, R_R8, R_R9); break;
@ -327,44 +389,75 @@ void EXPORT x64Syscall(x64emu_t *emu)
emu->quit = 1;
return;
}
printf_log(LOG_DEBUG, " => 0x%x\n", R_EAX);
if(R_EAX==0xffffffff && errno>0)
R_RAX = (uint64_t)-errno;
if(log) snprintf(buffret, 127, "0x%x%s", R_EAX, buff2);
if(log && !cycle_log) printf_log(LOG_NONE, "=> %s\n", buffret);
return;
}
}
switch (s) {
case 0: // sys_read
*(int64_t*)&R_RAX = read((int)R_EDI, (void*)R_RSI, (size_t)R_RDX);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 1: // sys_write
*(int64_t*)&R_RAX = write((int)R_EDI, (void*)R_RSI, (size_t)R_RDX);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 2: // sys_open
if(s==5) {printf_log(LOG_DEBUG, " => sys_open(\"%s\", %d, %d)", (char*)R_RDI, of_convert(R_ESI), R_EDX);};
if(s==5) {if (log) snprintf(buff2, 63, " [sys_open(\"%s\", %d, %d)]", (char*)R_RDI, of_convert(R_ESI), R_EDX);};
//*(int64_t*)&R_RAX = open((void*)R_EDI, of_convert(R_ESI), R_EDX);
*(int64_t*)&R_RAX = my_open(emu, (void*)R_RDI, of_convert(R_ESI), R_EDX);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 3: // sys_close
*(int64_t*)&R_RAX = close((int)R_EDI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 4: // sys_stat
*(int64_t*)&R_RAX = my_stat(emu, (void*)R_RDI, (void*)R_RSI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 5: // sys_fstat
*(int64_t*)&R_RAX = my_fstat(emu, (int)R_EDI, (void*)R_RSI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 6: // sys_lstat
*(int64_t*)&R_RAX = my_lstat(emu, (void*)R_RDI, (void*)R_RSI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#ifndef __NR_poll
case 7: // sys_poll
*(int64_t*)&R_RAX = poll((struct pollfd*)R_RDI, (nfds_t)R_RSI, (int)R_EDX);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
case 9: // sys_mmap
R_RAX = (uintptr_t)my_mmap64(emu, (void*)R_RDI, R_RSI, (int)R_EDX, (int)R_R10d, (int)R_R8d, R_R9);
break;
case 10: // sys_mprotect
*(int64_t*)&R_RAX = my_mprotect(emu, (void*)R_RDI, R_RSI, (int)R_EDX);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 11: // sys_munmap
*(int64_t*)&R_RAX = my_munmap(emu, (void*)R_RDI, R_RSI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 13: // sys_rt_sigaction
#if 1
R_RAX = (int64_t)my_syscall_rt_sigaction(emu, (int)R_EDI, (const x64_sigaction_restorer_t *)R_RSI, (x64_sigaction_restorer_t *)R_RDX, (size_t)R_R10);
#else
{
x64_sigaction_t n ={0};
x64_sigaction_t o = {0};
@ -384,93 +477,125 @@ void EXPORT x64Syscall(x64emu_t *emu)
memcpy(&p->sa_mask, &o.sa_mask, R_R10);
}
}
#endif
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#ifndef __NR_access
case 21: // sys_access
*(int64_t*)&R_RAX = access((void*)R_RDI, R_ESI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#ifndef __NR_pipe
case 22:
*(int64_t*)&R_RAX = pipe((void*)R_RDI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#ifndef __NR_select
case 23: // sys_select
R_EAX = (uint32_t)select(R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_R10, (void*)R_R8);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
case 25: // sys_mremap
R_RAX = (uintptr_t)my_mremap(emu, (void*)R_RDI, R_RSI, R_RDX, R_R10d, (void*)R_R8);
break;
#ifndef __NR_dup2
case 33: // sys_dup2
R_EAX = (uint32_t)dup2((int)R_EDI, (int)R_ESI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
case 56: // sys_clone
// x86_64 raw syscall is long clone(unsigned long flags, void *stack, int *parent_tid, int *child_tid, unsigned long tls);
// so flags=R_RDI, stack=R_RSI, parent_tid=R_RDX, child_tid=R_R10, tls=R_R8
if(R_RSI)
{
void* stack_base = (void*)R_RSI;
int stack_size = 0;
if(!R_RSI) {
// allocate a new stack...
int currstack = 0;
if((R_RSP>=(uintptr_t)emu->init_stack) && (R_RSP<=((uintptr_t)emu->init_stack+emu->size_stack)))
currstack = 1;
stack_size = (currstack)?emu->size_stack:(1024*1024);
stack_base = mmap(NULL, stack_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN, -1, 0);
// copy value from old stack to new stack
if(currstack)
memcpy(stack_base, emu->init_stack, stack_size);
else {
int size_to_copy = (uintptr_t)emu->init_stack + emu->size_stack - (R_RSP);
memcpy(stack_base+stack_size-size_to_copy, (void*)R_RSP, size_to_copy);
if((R_EDI&~0xff)==0x4100) {
// this is a case of vfork...
R_EAX = my_vfork(emu);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
} else {
if(R_RSI)
{
void* stack_base = (void*)R_RSI;
int stack_size = 0;
if(!R_RSI) {
// allocate a new stack...
int currstack = 0;
if((R_RSP>=(uintptr_t)emu->init_stack) && (R_RSP<=((uintptr_t)emu->init_stack+emu->size_stack)))
currstack = 1;
stack_size = (currstack && emu->size_stack)?emu->size_stack:(1024*1024);
stack_base = mmap(NULL, stack_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN, -1, 0);
// copy value from old stack to new stack
if(currstack)
memcpy(stack_base, emu->init_stack, stack_size);
else {
int size_to_copy = (uintptr_t)emu->init_stack + emu->size_stack - (R_RSP);
memcpy(stack_base+stack_size-size_to_copy, (void*)R_RSP, size_to_copy);
}
}
x64emu_t * newemu = NewX64Emu(emu->context, R_RIP, (uintptr_t)stack_base, stack_size, (R_RSI)?0:1);
SetupX64Emu(newemu);
CloneEmu(newemu, emu);
Push64(newemu, 0);
PushExit(newemu);
void* mystack = NULL;
if(my_context->stack_clone_used) {
mystack = box_malloc(1024*1024); // stack for own process... memory leak, but no practical way to remove it
} else {
if(!my_context->stack_clone)
my_context->stack_clone = box_malloc(1024*1024);
mystack = my_context->stack_clone;
my_context->stack_clone_used = 1;
}
int64_t ret = clone(clone_fn, (void*)((uintptr_t)mystack+1024*1024), R_RDI, newemu, R_RDX, R_R8, R_R10);
R_RAX = (uint64_t)ret;
}
x64emu_t * newemu = NewX64Emu(emu->context, R_RIP, (uintptr_t)stack_base, stack_size, (R_RSI)?0:1);
SetupX64Emu(newemu);
CloneEmu(newemu, emu);
Push64(newemu, 0);
PushExit(newemu);
void* mystack = NULL;
if(my_context->stack_clone_used) {
mystack = box_malloc(1024*1024); // stack for own process... memory leak, but no practical way to remove it
} else {
if(!my_context->stack_clone)
my_context->stack_clone = box_malloc(1024*1024);
mystack = my_context->stack_clone;
my_context->stack_clone_used = 1;
}
int64_t ret = clone(clone_fn, (void*)((uintptr_t)mystack+1024*1024), R_RDI, newemu, R_RDX, R_R8, R_R10);
R_RAX = (uint64_t)ret;
else
#ifdef NOALIGN
R_RAX = (uint64_t)syscall(__NR_clone, R_RDI, R_RSI, R_RDX, R_R10, R_R8);
#else
R_RAX = (uint64_t)syscall(__NR_clone, R_RDI, R_RSI, R_RDX, R_R8, R_R10); // invert R_R8/R_R10 on Aarch64 and most other
#endif
}
else
#ifdef NOALIGN
R_RAX = (uint64_t)syscall(__NR_clone, R_RDI, R_RSI, R_RDX, R_R10, R_R8);
#else
R_RAX = (uint64_t)syscall(__NR_clone, R_RDI, R_RSI, R_RDX, R_R8, R_R10); // invert R_R8/R_R10 on Aarch64 and most other
#endif
break;
#ifndef __NR_fork
case 57:
R_RAX = fork();
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#ifndef __NR_vfork
case 58: // vfork
{
int64_t r = vfork();
int64_t r = my_vfork(emu);
R_RAX = r;
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
}
break;
#endif
case 63: //uname
{
old_utsname_t *old = (old_utsname_t*)R_RDI;
struct utsname uts;
R_RAX = uname(&uts);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
memcpy(old, &uts, sizeof(*old)); // old_uts is just missing a field from new_uts
strcpy(old->machine, "x86_64");
}
break;
case 72: //fcntl
R_RAX = (uint64_t)my_fcntl(emu, (int)R_EDI, (int)R_ESI, (void*)R_RDX);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#ifndef __NR_getdents
case 78:
{
@ -479,67 +604,122 @@ void EXPORT x64Syscall(x64emu_t *emu)
ssize_t ret = syscall(__NR_getdents64, R_EDI, d64, count);
ret = DirentFromDirent64((void*)R_RSI, d64, ret);
R_RAX = (uint64_t)ret;
if(ret==-1)
R_RAX = (uint64_t)-errno;
}
break;
#endif
#ifndef __NR_rename
case 82: // sys_rename
*(int64_t*)&R_RAX = rename((void*)R_RDI, (void*)R_RSI);
*(int64_t*)&R_RAX = rename((void*)R_RDI, (void*)R_RSI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#ifndef __NR_mkdir
case 83: // sys_mkdir
*(int64_t*)&R_RAX = mkdir((void*)R_RDI, R_ESI);
break;
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#ifndef __NR_unlink
case 87: //sys_unlink
*(int64_t*)&R_RAX = unlink((void*)R_RDI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
case 89: // sys_readlink
R_RAX = (ssize_t)my_readlink(emu,(void*)R_RDI, (void*)R_RSI, (size_t)R_RDX);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 131: // sys_sigaltstack
*(int64_t*)&R_RAX = my_sigaltstack(emu, (void*)R_RDI, (void*)R_RSI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
case 158: // sys_arch_prctl
*(int64_t*)&R_RAX = my_arch_prctl(emu, (int)R_EDI, (void*)R_RSI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#ifndef __NR_time
case 201: // sys_time
R_RAX = (uintptr_t)time((void*)R_RDI);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#if !defined(__NR_epoll_wait) && !defined(NO_ALIGN)
#if !defined(__NR_epoll_wait) || !defined(NOALIGN)
case 232:
R_RAX = my_epoll_wait(emu, (int)R_EDI, (void*)R_RSI, (int)R_EDX, (int)R_R8d);
R_RAX = my_epoll_wait(emu, (int)R_EDI, (void*)R_RSI, (int)R_EDX, (int)R_R10d);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#if !defined(__NR_epoll_ctl) || !defined(NOALIGN)
case 233:
R_EAX = my_epoll_ctl(emu, (int)R_EDI, (int)R_ESI, (int)R_EDX, (void*)R_R10);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#ifndef __NR_inotify_init
case 253:
R_EAX = (int)syscall(__NR_inotify_init1, 0);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#ifndef NOALIGN
case 257:
R_EAX = (int)syscall(__NR_openat, (int)R_EDI, (void*)R_RSI, of_convert((int)R_EDX), R_R10d);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
case 262:
R_EAX = (uint64_t)(int64_t)my_fstatat(emu, (int)R_RDI, (char*)R_RSI, (void*)R_RDX, (int)R_R10d);
R_EAX = (uint32_t)my_fstatat(emu, (int)R_RDI, (char*)R_RSI, (void*)R_RDX, (int)R_R10d);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#ifndef __NR_renameat
case 264:
R_EAX = (uint32_t)renameat((int)R_RDI, (const char*)R_RSI, (int)R_EDX, (const char*)R_R10);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
#ifndef NOALIGN
case 281: // sys_epool_pwait
R_EAX = (uint32_t)my_epoll_pwait(emu, (int)R_EDI, (void*)R_RSI, (int)R_EDX, (int)R_R10d, (void*)R_R8);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
case 317: // sys_seccomp
R_RAX = 0; // ignoring call
break;
case 334: // It is helpeful to run static binary
R_RAX = -1;
errno = ENOSYS;
R_RAX = -ENOSYS;
break;
#ifndef __NR_fchmodat4
case 434:
*(int64_t*)R_RAX = fchmodat((int)R_EDI, (void*)R_RSI, (mode_t)R_RDX, (int)R_R10d);
break;
#endif
#ifndef __NR_fchmodat4
case 434:
*(int64_t*)R_RAX = fchmodat((int)R_EDI, (void*)R_RSI, (mode_t)R_RDX, (int)R_R10d);
if(R_EAX==0xffffffff)
R_RAX = (uint64_t)-errno;
break;
#endif
default:
printf_log(LOG_INFO, "Error: Unsupported Syscall 0x%02Xh (%d)\n", s, s);
emu->quit = 1;
emu->error |= ERR_UNIMPL;
return;
}
printf_log(LOG_DEBUG, " => 0x%lx\n", R_RAX);
if(log) snprintf(buffret, 127, "0x%lx%s", R_RAX, buff2);
if(log && !cycle_log) printf_log(LOG_NONE, "=> %s\n", buffret);
}
#define stack(n) (R_RSP+8+n)
@ -589,13 +769,21 @@ uintptr_t EXPORT my_syscall(x64emu_t *emu)
return (uint64_t)(int64_t)my_fstat(emu, (int)R_ESI, (void*)R_RDX);
case 6: // sys_lstat
return (uint64_t)(int64_t)my_lstat(emu, (void*)R_RSI, (void*)R_RDX);
#ifndef __NR_poll
case 7: // sys_poll
return (uint64_t)(int64_t)poll((struct pollfd*)R_RSI, (nfds_t)R_RDX, (int)R_ECX);
break;
#endif
case 9: // sys_mmap
return (uintptr_t)my_mmap64(emu, (void*)R_RSI, R_RDX, (int)R_RCX, (int)R_R8d, (int)R_R9, i64(0));
case 10: // sys_mprotect
return (uint64_t)(int64_t)my_mprotect(emu, (void*)R_RSI, R_RDX, (int)R_ECX);
return (uint64_t)my_mprotect(emu, (void*)R_RSI, R_RDX, (int)R_ECX);
case 11: // sys_munmap
return (uint64_t)(int64_t)my_munmap(emu, (void*)R_RSI, R_RDX);
return (uint64_t)my_munmap(emu, (void*)R_RSI, R_RDX);
case 13: // sys_rt_sigaction
#if 1
return my_syscall_rt_sigaction(emu, (int)R_ESI, (const x64_sigaction_restorer_t *)R_RDX, (x64_sigaction_restorer_t *)R_RCX, (size_t)R_R8);
#else
{
x64_sigaction_t n ={0};
x64_sigaction_t o = {0};
@ -616,6 +804,7 @@ uintptr_t EXPORT my_syscall(x64emu_t *emu)
}
return ret;
}
#endif
#ifndef __NR_access
case 21: // sys_access
return (uint64_t)(int64_t)access((void*)R_RSI, R_EDX);
@ -677,14 +866,16 @@ uintptr_t EXPORT my_syscall(x64emu_t *emu)
return (uintptr_t)syscall(__NR_clone, R_RSI, R_RDX, R_RCX, R_R9, R_R8); // invert R_R8/R_R9 on Aarch64 and most other
#endif
break;
#ifndef __NR_dup2
case 33:
return dup2((int)R_ESI, (int)R_EDX);
#endif
#ifndef __NR_fork
case 57:
return fork();
#endif
#ifndef __NR_vfork
case 58: // vfork
return vfork();
#endif
return my_vfork(emu);
case 63: //uname
{
old_utsname_t *old = (old_utsname_t*)R_RSI;
@ -694,6 +885,9 @@ uintptr_t EXPORT my_syscall(x64emu_t *emu)
strcpy(old->machine, "x86_64");
return ret;
}
case 72: //fcntl
R_RAX = (uint64_t)my_fcntl(emu, (int)R_ESI, (int)R_EDX, (void*)R_RCX);
break;
#ifndef __NR_getdents
case 78:
{
@ -722,17 +916,38 @@ uintptr_t EXPORT my_syscall(x64emu_t *emu)
case 201: // sys_time
return (uintptr_t)time((void*)R_RSI);
#endif
#if !defined(__NR_epoll_wait) && !defined(NO_ALIGN)
#if !defined(__NR_epoll_wait) || !defined(NOALIGN)
case 232:
R_RAX = my_epoll_wait(emu, (int)R_ESI, (void*)R_RDX, (int)R_ECX, (int)R_R8d);
break;
#endif
#if !defined(__NR_epoll_ctl) || !defined(NOALIGN)
case 233:
R_EAX = my_epoll_ctl(emu, (int)R_ESI, (int)R_EDX, (int)R_ECX, (void*)R_R8);
break;
#endif
#ifndef __NR_inotify_init
case 253:
return (int)syscall(__NR_inotify_init1, 0);
#endif
#ifndef NOALIGN
case 257:
R_EAX = (int)syscall(__NR_openat, (int)R_ESI, (void*)R_RDX, of_convert((int)R_ECX), R_R8d);
break;
#endif
case 262:
return (uint64_t)(int64_t)my_fstatat(emu, (int)R_RSI, (char*)R_RDX, (void*)R_RCX, (int)R_R8d);
#ifndef __NR_renameat
case 264:
return (uint64_t)(int64_t)renameat((int)R_RSI, (const char*)R_RDX, (int)R_ECX, (const char*)R_R8);
#endif
#ifndef NOALIGN
case 281: // sys_epool_pwait
return (uint64_t)(int64_t)my_epoll_pwait(emu, (int)R_ESI, (void*)R_RDX, (int)R_ECX, (int)R_R8d, (void*)R_R9);
break;
#endif
case 317: // sys_seccomp
return 0; // ignoring call
#ifndef __NR_fchmodat4
case 434:
return (int)fchmodat((int)R_ESI, (void*)R_RDX, (mode_t)R_RCX, (int)R_R8d);

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
// Handling of TLS calls, include x86 specifi set_thread_area
#include <unistd.h>
#include <string.h>
@ -9,7 +6,7 @@
#include <stdlib.h>
#include "debug.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "x64emu.h"
#include "x64emu_private.h"
#include "x64tls.h"
@ -155,6 +152,7 @@ int my_arch_prctl(x64emu_t *emu, int code, void* addr)
my_context->segtls[3].limit = 0;
my_context->segtls[3].present = 1;
pthread_setspecific(my_context->segtls[3].key, (void*)my_context->segtls[3].base);
ResetSegmentsCache(emu);
return 0;
case ARCH_GET_FS:
*(void**)addr = GetSegmentBase(emu->segs[_FS]);
@ -185,25 +183,25 @@ int my_arch_prctl(x64emu_t *emu, int code, void* addr)
uintptr_t ssp_base 0x78
.... padding .... 0x200?
*/
static int sizeDTS(rvtranscontext_t* context)
static int sizeDTS(box64context_t* context)
{
return ((context->elfsize+0xff)&~0xff)*16;
}
static int sizeTLSData(int s)
{
uint32_t mask = rvtrans_nogtk?0xffff:0x1fff;
uint32_t mask = 0xffff/*box64_nogtk?0xffff:0x1fff*/; // x86_64 does the mapping per 64K blocks, so it makes sense to have it this large
return (s+mask)&~mask;
}
static tlsdatasize_t* setupTLSData(rvtranscontext_t* context)
static tlsdatasize_t* setupTLSData(box64context_t* context)
{
// Setup the GS segment:
int dtssize = sizeDTS(context);
int datasize = sizeTLSData(context->tlssize);
void *ptr_oversized = (char*)malloc(dtssize+POS_TLS+datasize);
void *ptr_oversized = (char*)box_malloc(dtssize+POS_TLS+datasize);
void *ptr = (void*)((uintptr_t)ptr_oversized + datasize);
memcpy((void*)((uintptr_t)ptr-context->tlssize), context->tlsdata, context->tlssize);
tlsdatasize_t *data = (tlsdatasize_t*)calloc(1, sizeof(tlsdatasize_t));
tlsdatasize_t *data = (tlsdatasize_t*)box_calloc(1, sizeof(tlsdatasize_t));
data->data = ptr;
data->tlssize = context->tlssize;
data->ptr = ptr_oversized;
@ -229,17 +227,17 @@ static tlsdatasize_t* setupTLSData(rvtranscontext_t* context)
return data;
}
void* fillTLSData(rvtranscontext_t *context)
static void* fillTLSData(box64context_t *context)
{
pthread_mutex_lock(&context->mutex_tls);
mutex_lock(&context->mutex_tls);
tlsdatasize_t *data = setupTLSData(context);
pthread_mutex_unlock(&context->mutex_tls);
mutex_unlock(&context->mutex_tls);
return data;
}
void* resizeTLSData(rvtranscontext_t *context, void* oldptr)
static void* resizeTLSData(box64context_t *context, void* oldptr)
{
pthread_mutex_lock(&context->mutex_tls);
mutex_lock(&context->mutex_tls);
tlsdatasize_t* oldata = (tlsdatasize_t*)oldptr;
if(sizeTLSData(oldata->tlssize)!=sizeTLSData(context->tlssize) || (oldata->n_elfs/0xff)!=(context->elfsize/0xff)) {
printf_log(LOG_INFO, "Warning, resizing of TLS occured! size: %d->%d / n_elfs: %d->%d\n", sizeTLSData(oldata->tlssize), sizeTLSData(context->tlssize), 1+(oldata->n_elfs/0xff), 1+(context->elfsize/0xff));
@ -247,7 +245,7 @@ void* resizeTLSData(rvtranscontext_t *context, void* oldptr)
// copy the relevent old part, in case something changed
memcpy((void*)((uintptr_t)data->data-oldata->tlssize), (void*)((uintptr_t)oldata->data-oldata->tlssize), oldata->tlssize);
// all done, update new size, free old pointer and exit
pthread_mutex_unlock(&context->mutex_tls);
mutex_unlock(&context->mutex_tls);
free_tlsdatasize(oldptr);
return data;
} else {
@ -269,19 +267,26 @@ void* resizeTLSData(rvtranscontext_t *context, void* oldptr)
}
oldata->n_elfs = context->elfsize;
}
pthread_mutex_unlock(&context->mutex_tls);
mutex_unlock(&context->mutex_tls);
return oldata;
}
}
tlsdatasize_t* getTLSData(box64context_t *context)
{
tlsdatasize_t* ptr = NULL;
if(!ptr)
if ((ptr = (tlsdatasize_t*)pthread_getspecific(context->tlskey)) == NULL) {
ptr = (tlsdatasize_t*)fillTLSData(context);
}
if(ptr->tlssize != context->tlssize)
ptr = (tlsdatasize_t*)resizeTLSData(context, ptr);
return ptr;
}
static void* GetSeg33Base()
{
tlsdatasize_t* ptr;
if ((ptr = (tlsdatasize_t*)pthread_getspecific(my_context->tlskey)) == NULL) {
ptr = (tlsdatasize_t*)fillTLSData(my_context);
}
if(ptr->tlssize != my_context->tlssize)
ptr = (tlsdatasize_t*)resizeTLSData(my_context, ptr);
tlsdatasize_t* ptr = getTLSData(my_context);
return ptr->data;
}

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
@ -8,7 +5,7 @@
#include "debug.h"
#include "x64trace.h"
#include "rvtranscontext.h"
#include "box64context.h"
#include "x86zydis.h"
#include "x64emu_private.h"
@ -40,7 +37,7 @@ typedef struct zydis_dec_s {
PFNZydisFormatterFormatInstruction ZydisFormatterFormatInstruction;
} zydis_dec_t;
int InitX64Trace(rvtranscontext_t *context)
int InitX64Trace(box64context_t *context)
{
if(context->zydis)
return 0;
@ -66,7 +63,7 @@ int InitX64Trace(rvtranscontext_t *context)
return 0;
}
void DeleteX64Trace(rvtranscontext_t *context)
void DeleteX64Trace(box64context_t *context)
{
if(!context->zydis)
return;
@ -76,7 +73,7 @@ void DeleteX64Trace(rvtranscontext_t *context)
context->zydis = NULL;
}
zydis_dec_t* InitX64TraceDecoder(rvtranscontext_t *context)
zydis_dec_t* InitX64TraceDecoder(box64context_t *context)
{
if(!context->zydis)
return NULL;
@ -105,7 +102,13 @@ const char* DecodeX64Trace(zydis_dec_t *dec, uintptr_t p)
sprintf(tmp, "%02X ", *((unsigned char*)p+i));
strcat(buff, tmp);
}
#if 0
const /*ZydisFormatterToken*/void* token;
dec->ZydisFormatterTokenizeInstruction(&dec->formatter, &dec->instruction, tmp, sizeof(tmp), p, &token);
dec->PrintTokenizedInstruction(token);
#else
dec->ZydisFormatterFormatInstruction(&dec->formatter, &dec->instruction, tmp, sizeof(tmp),p);
#endif
strcat(buff, tmp);
} else {
sprintf(buff, "Decoder failed @%p", (void*)p);

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef _X86_ZYDIS_H_
#define _X86_ZYDIS_H_

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
@ -26,7 +23,7 @@ void reset_fpu(x64emu_t* emu)
{
memset(emu->x87, 0, sizeof(emu->x87));
memset(emu->fpu_ld, 0, sizeof(emu->fpu_ld));
emu->cw = 0x37F;
emu->cw.x16 = 0x37F;
emu->sw.x16 = 0x0000;
emu->top = 0;
emu->fpu_stack = 0;
@ -86,6 +83,10 @@ void fpu_fbld(x64emu_t* emu, uint8_t* s) {
// long double (80bits) -> double (64bits)
void LD2D(void* ld, void* d)
{
if(box64_x87_no80bits) {
*(uint64_t*)d = *(uint64_t*)ld;
return;
}
FPU_t result;
#pragma pack(push, 1)
struct {
@ -93,7 +94,13 @@ void LD2D(void* ld, void* d)
int16_t b;
} val;
#pragma pack(pop)
#if 1
memcpy(&val, ld, 10);
#else
val.f.ud[0] = *(uint32_t*)ld;
val.f.ud[1] = *(uint32_t*)(char*)(ld+4);
val.b = *(int16_t*)((char*)ld+8);
#endif
int32_t exp64 = (((uint32_t)(val.b&0x7fff) - BIAS80) + BIAS64);
int32_t exp64final = exp64&0x7ff;
// do specific value first (0, infinite...)
@ -119,7 +126,7 @@ void LD2D(void* ld, void* d)
*(uint64_t*)d = result.q;
return;
}
if(((uint32_t)(val.b&0x7fff)==0) || (exp64<=0)) {
if(((uint32_t)(val.b&0x7fff)==0) || (exp64<-1074)) {
//if(val.f.q==0)
// zero
//if(val.f.q!=0)
@ -131,6 +138,18 @@ void LD2D(void* ld, void* d)
return;
}
if(exp64<=0 && val.f.q) {
// try to see if it can be a denormal
int one = -exp64-1022;
uint64_t r = 0;
if(val.b&0x8000)
r |= 0x8000000000000000L;
r |= val.f.q>>one;
*(uint64_t*)d = r;
return;
}
if(exp64>=0x7ff) {
// to big value...
result.d = HUGE_VAL;
@ -151,6 +170,10 @@ void LD2D(void* ld, void* d)
// double (64bits) -> long double (80bits)
void D2LD(void* d, void* ld)
{
if(box64_x87_no80bits) {
*(uint64_t*)ld = *(uint64_t*)d;
return;
}
#pragma pack(push, 1)
struct {
FPU_t f;
@ -187,6 +210,12 @@ void D2LD(void* d, void* ld)
if(exp80!=0){
mant80final |= 0x8000000000000000L;
exp80final += (BIAS80 - BIAS64);
} else if(mant80final!=0) {
// denormals -> normal
exp80final = BIAS80-1023;
int one = __builtin_clz(mant80final) + 1;
exp80final -= one;
mant80final<<=one;
}
}
val.b = ((int16_t)(sign80)<<15)| (int16_t)(exp80final);
@ -198,7 +227,9 @@ void D2LD(void* d, void* ld)
double FromLD(void* ld)
{
double ret = 0.0;
if(box64_x87_no80bits)
return *(double*)ld;
double ret; // cannot add = 0; it break factorio (issue when calling fmodl)
LD2D(ld, &ret);
return ret;
}
@ -207,7 +238,7 @@ double FromLD(void* ld)
long double LD2localLD(void* ld)
{
// local implementation may not be try Quad precision, but double-double precision, so simple way to keep the 80bits precision in the conversion
double ret;
double ret; // cannot add = 0; it break factorio (issue when calling fmodl)
LD2D(ld, &ret);
return ret;
}
@ -220,7 +251,7 @@ long double LD2localLD(void* ld)
void fpu_loadenv(x64emu_t* emu, char* p, int b16)
{
emu->cw = *(uint16_t*)p;
emu->cw.x16 = *(uint16_t*)p;
p+=(b16)?2:4;
emu->sw.x16 = *(uint16_t*)p;
emu->top = emu->sw.f.F87_TOP;
@ -238,7 +269,7 @@ void fpu_loadenv(x64emu_t* emu, char* p, int b16)
void fpu_savenv(x64emu_t* emu, char* p, int b16)
{
emu->sw.f.F87_TOP = emu->top&7;
*(uint16_t*)p = emu->cw;
*(uint16_t*)p = emu->cw.x16;
p+=2;
if(!b16) {*(uint16_t*)p = 0; p+=2;}
*(uint16_t*)p = emu->sw.x16;
@ -296,8 +327,9 @@ void fpu_fxsave32(x64emu_t* emu, void* ed)
if(top==0) // check if stack is full or empty, based on tag[0]
stack = (emu->p_regs[0].tag)?8:0;
emu->sw.f.F87_TOP = top;
p->ControlWord = emu->cw;
p->ControlWord = emu->cw.x16;
p->StatusWord = emu->sw.x16;
p->MxCsr = emu->mxcsr.x32;
uint8_t tags = 0;
for (int i=0; i<8; ++i)
tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1;
@ -325,8 +357,9 @@ void fpu_fxsave64(x64emu_t* emu, void* ed)
if(top==0) // check if stack is full or empty, based on tag[0]
stack = (emu->p_regs[0].tag)?8:0;
emu->sw.f.F87_TOP = top;
p->ControlWord = emu->cw;
p->ControlWord = emu->cw.x16;
p->StatusWord = emu->sw.x16;
p->MxCsr = emu->mxcsr.x32;
uint8_t tags = 0;
for (int i=0; i<8; ++i)
tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1;
@ -346,8 +379,11 @@ void fpu_fxsave64(x64emu_t* emu, void* ed)
void fpu_fxrstor32(x64emu_t* emu, void* ed)
{
xsave32_t *p = (xsave32_t*)ed;
emu->cw = p->ControlWord;
emu->cw.x16 = p->ControlWord;
emu->sw.x16 = p->StatusWord;
emu->mxcsr.x32 = p->MxCsr;
if(box64_sse_flushto0)
applyFlushTo0(emu);
emu->top = emu->sw.f.F87_TOP;
uint8_t tags = p->TagWord;
for(int i=0; i<8; ++i)
@ -366,8 +402,11 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed)
void fpu_fxrstor64(x64emu_t* emu, void* ed)
{
xsave64_t *p = (xsave64_t*)ed;
emu->cw = p->ControlWord;
emu->cw.x16 = p->ControlWord;
emu->sw.x16 = p->StatusWord;
emu->mxcsr.x32 = p->MxCsr;
if(box64_sse_flushto0)
applyFlushTo0(emu);
emu->top = emu->sw.f.F87_TOP;
uint8_t tags = p->TagWord;
for(int i=0; i<8; ++i)

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __X87RUN_PRIVATE_H_
#define __X87RUN_PRIVATE_H_
@ -110,7 +107,7 @@ static inline void fpu_fcomi(x64emu_t* emu, double b)
static inline double fpu_round(x64emu_t* emu, double d) {
if (!isfinite(d))
return d;
switch(emu->round) {
switch(emu->cw.f.C87_RD) {
case ROUND_Nearest:
return nearbyint(d);
case ROUND_Down:

View File

@ -1,6 +1,3 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __AUXVAL_H__
#define __AUXVAL_H__

7
src/include/bitutils.h Normal file
View File

@ -0,0 +1,7 @@
#ifndef __BITUTILS_H_
#define __BITUTILS_H_
#include <stdint.h>
int TrailingZeros64(uint64_t x);
#endif //__BITUTILS_H_

View File

@ -1,12 +1,12 @@
/* Copyright (c) 2018-2021 Sebastien Chevalier ("ptitSeb")
* Distributed under MIT license.
* See file LICENSE for detail or copy at https://opensource.org/licenses/mit */
#ifndef __RVTRANSCONTEXT_H_
#define __RVTRANSCONTEXT_H_
#ifndef __BOX64CONTEXT_H_
#define __BOX64CONTEXT_H_
#include <stdint.h>
#include <pthread.h>
#include "pathcoll.h"
#include "dictionnary.h"
#ifdef DYNAREC
#include "dynarec/native_lock.h"
#endif
typedef struct elfheader_s elfheader_t;
typedef struct cleanup_s cleanup_t;
@ -17,6 +17,7 @@ typedef struct lib_s lib_t;
typedef struct bridge_s bridge_t;
typedef struct dlprivate_s dlprivate_t;
typedef struct kh_symbolmap_s kh_symbolmap_t;
typedef struct kh_defaultversion_s kh_defaultversion_t;
typedef struct library_s library_t;
typedef struct linkmap_s linkmap_t;
typedef struct kh_threadstack_s kh_threadstack_t;
@ -26,10 +27,14 @@ typedef struct atfork_fnc_s {
uintptr_t child;
void* handle;
} atfork_fnc_t;
#ifdef DYNAREC
typedef struct dynablock_s dynablock_t;
typedef struct mmaplist_s mmaplist_t;
typedef struct kh_dynablocks_s kh_dynablocks_t;
#endif
#define DYNAMAP_SHIFT 16
#define JMPTABL_SHIFT 16
typedef void* (*procaddess_t)(const char* name);
typedef void* (*procaddress_t)(const char* name);
typedef void* (*vkprocaddess_t)(void* instance, const char* name);
#define MAX_SIGNAL 64
@ -46,13 +51,13 @@ void free_tlsdatasize(void* p);
typedef struct needed_libs_s {
int cap;
int size;
library_t **libs;
char** names;
library_t** libs;
} needed_libs_t;
void add_neededlib(needed_libs_t* needed, library_t* lib);
void free_neededlib(needed_libs_t* needed);
void add_dependedlib(needed_libs_t* depended, library_t* lib);
void free_dependedlib(needed_libs_t* depended);
needed_libs_t* new_neededlib(int n);
void add1_neededlib(needed_libs_t* needed);
typedef struct base_segment_s {
uintptr_t base;
@ -61,13 +66,11 @@ typedef struct base_segment_s {
pthread_key_t key;
} base_segment_t;
#define CYCLE_LOG 16
typedef struct box64context_s {
path_collection_t box64_path; // PATH env. variable
path_collection_t box64_ld_lib; // LD_LIBRARY_PATH env. variable
typedef struct rvtranscontext_s {
path_collection_t rvtrans_path; // PATH env. variable
path_collection_t rvtrans_ld_lib; // LD_LIBRARY_PATH env. variable
path_collection_t rvtrans_emulated_libs; // Collection of libs that should not be wrapped
path_collection_t box64_emulated_libs; // Collection of libs that should not be wrapped
int x64trace;
int trace_tid;
@ -75,7 +78,7 @@ typedef struct rvtranscontext_s {
uint32_t sel_serial; // will be increment each time selectors changes
zydis_t *zydis; // dlopen the zydis dissasembler
void* rvtranslib; // dlopen on rvtrans itself
void* box64lib; // dlopen on box64 itself
int argc;
char** argv;
@ -84,8 +87,9 @@ typedef struct rvtranscontext_s {
char** envv;
char* fullpath;
char* rvtranspath; // path of current rvtrans executable
char* box64path; // path of current box64 executable
char* box86path; // path of box86 executable (if present)
char* bashpath; // path of x86_64 bash (defined with BOX64_BASH or by running bash directly)
uint64_t stacksz;
size_t stackalign;
@ -96,7 +100,8 @@ typedef struct rvtranscontext_s {
int elfsize; // number of elf loaded
needed_libs_t neededlibs; // needed libs for main elf
needed_libs_t *neededlibs; // needed libs for main elf
needed_libs_t *preload;
uintptr_t ep; // entry point
@ -109,41 +114,39 @@ typedef struct rvtranscontext_s {
uintptr_t vsyscall; // vsyscall bridge value
uintptr_t vsyscalls[3]; // the 3 x86 VSyscall pseudo bridges (mapped at 0xffffffffff600000+)
dlprivate_t *dlprivate; // dlopen library map
kh_symbolmap_t *glwrappers; // the map of wrapper for glProcs (for GLX or SDL1/2)
kh_symbolmap_t *glmymap; // link to the mysymbolmap of libGL
procaddess_t glxprocaddress;
kh_symbolmap_t *alwrappers; // the map of wrapper for alGetProcAddress
kh_symbolmap_t *almymap; // link to the mysymbolmap if libOpenAL
kh_symbolmap_t *vkwrappers; // the map of wrapper for VulkanProcs (TODO: check SDL2)
kh_symbolmap_t *vkmymap; // link to the mysymbolmap of libGL
kh_defaultversion_t *globaldefver; // the global default version for symbols (the XXX@@vvvv of symbols)
kh_defaultversion_t *weakdefver; // the weak default version for symbols (the XXX@@vvvv of symbols)
vkprocaddess_t vkprocaddress;
pthread_mutex_t mutex_once;
pthread_mutex_t mutex_once2;
pthread_mutex_t mutex_trace;
#ifndef DYNAREC
pthread_mutex_t mutex_lock; // dynarec build will use their own mecanism
pthread_mutex_t mutex_trace;
pthread_mutex_t mutex_tls;
pthread_mutex_t mutex_thread;
pthread_mutex_t mutex_bridge;
#else
uint32_t mutex_dyndump;
uint32_t mutex_trace;
uint32_t mutex_tls;
uint32_t mutex_thread;
uint32_t mutex_bridge;
uintptr_t max_db_size; // the biggest (in x86_64 instructions bytes) built dynablock
int trace_dynarec;
#endif
library_t *libclib; // shortcut to libc library (if loaded, so probably yes)
library_t *sdl1lib; // shortcut to SDL1 library (if loaded)
void* sdl1allocrw;
void* sdl1freerw;
library_t *sdl1mixerlib;
library_t *sdl2lib; // shortcut to SDL2 library (if loaded)
void* sdl2allocrw;
void* sdl2freerw;
library_t *sdl2lib;
library_t *sdl2mixerlib;
library_t *x11lib;
library_t *zlib;
library_t *vorbisfile;
library_t *vorbis;
library_t *asound;
library_t *pulse;
library_t *d3dadapter9;
library_t *libglu;
linkmap_t *linkmap;
void* sdl1allocrw; // SDL1 AllocRW/FreeRW function
void* sdl1freerw;
void* sdl2allocrw; // SDL2 AllocRW/FreeRW function
void* sdl2freerw;
int deferedInit;
elfheader_t **deferedInitList;
@ -157,9 +160,9 @@ typedef struct rvtranscontext_s {
uintptr_t *auxval_start;
cleanup_t *cleanups; // atexit functions
int clean_sz;
int clean_cap;
cleanup_t *cleanups; // atexit functions
int clean_sz;
int clean_cap;
zydis_dec_t *dec; // trace
@ -182,22 +185,38 @@ typedef struct rvtranscontext_s {
int stack_clone_used;
// rolling logs
char* log_call[CYCLE_LOG];
char* log_ret[CYCLE_LOG];
char* *log_call;
char* *log_ret;
int current_line;
} rvtranscontext_t;
} box64context_t;
extern rvtranscontext_t *my_context; // global context
#ifndef DYNAREC
#define mutex_lock(A) pthread_mutex_lock(A)
#define mutex_trylock(A) pthread_mutex_trylock(A)
#define mutex_unlock(A) pthread_mutex_unlock(A)
#else
int GetTID();
#define mutex_lock(A) {uint32_t tid = (uint32_t)GetTID(); while(native_lock_storeifnull_d(A, tid)) sched_yield();}
#define mutex_trylock(A) native_lock_storeifnull_d(A, (uint32_t)GetTID())
#define mutex_unlock(A) native_lock_storeifref_d(A, 0, (uint32_t)GetTID())
#endif
rvtranscontext_t *NewRVTransContext(int argc);
void FreeRVTransContext(rvtranscontext_t** context);
extern box64context_t *my_context; // global context
box64context_t *NewBox64Context(int argc);
void FreeBox64Context(box64context_t** context);
// Cycle log handling
void freeCycleLog(box64context_t* ctx);
void initCycleLog(box64context_t* context);
void print_cycle_log(int loglevel);
// return the index of the added header
int AddElfHeader(rvtranscontext_t* ctx, elfheader_t* head);
int AddElfHeader(box64context_t* ctx, elfheader_t* head);
// return the tlsbase (negative) for the new TLS partition created (no partition index is stored in the context)
int AddTLSPartition(rvtranscontext_t* context, int tlssize);
int AddTLSPartition(box64context_t* context, int tlssize);
// defined in fact in threads.c
void thread_set_emu(x64emu_t* emu);
@ -208,4 +227,4 @@ int unlockMutex();
// relock the muxtex that were unlocked
void relockMutex(int locks);
#endif //__RVTRANSCONTEXT_H_
#endif //__BOX64CONTEXT_H_

Some files were not shown because too many files have changed in this diff Show More