From c1f5d9c203324b9435414e789c02394ca99c98f7 Mon Sep 17 00:00:00 2001 From: Shinichiro Hamaji Date: Mon, 9 Nov 2015 16:47:42 +0900 Subject: [PATCH] Use .KATI_RESTAT to reduce unnecessary rebuilds of .jar files The same optimization was done for binaries in https://android-review.googlesource.com/#/c/175250/ To create a TOC file from .jar files, this change introduces ijar, which is designed for this purpose. Only #include lines were modified from the original version. https://github.com/bazelbuild/bazel/tree/master/third_party/ijar Performance: $ m && touch frameworks/base/core/java/com/google/android/util/Procedure.java && time m Before: 4m30s (1580 targets) After: 3m57s (772 targets) Unfortunately, the improvement is small yet, but local experiments showed we can cut ~2 more minutes if the similar optimization is done for .dex files. Bug: 24597504 Change-Id: Id54953aea25e497c8ebbe783b03f13b488577c6a --- core/base_rules.mk | 16 +- core/config.mk | 4 + core/definitions.mk | 36 + core/java.mk | 2 + core/java_common.mk | 1 + core/prebuilt_internal.mk | 2 + tools/ijar/Android.mk | 14 + tools/ijar/LICENSE | 203 +++++ tools/ijar/README.txt | 120 +++ tools/ijar/classfile.cc | 1788 +++++++++++++++++++++++++++++++++++++ tools/ijar/common.h | 102 +++ tools/ijar/ijar.cc | 182 ++++ tools/ijar/zip.cc | 1031 +++++++++++++++++++++ tools/ijar/zip.h | 173 ++++ tools/ijar/zip_main.cc | 312 +++++++ 15 files changed, 3973 insertions(+), 13 deletions(-) create mode 100644 tools/ijar/Android.mk create mode 100644 tools/ijar/LICENSE create mode 100644 tools/ijar/README.txt create mode 100644 tools/ijar/classfile.cc create mode 100644 tools/ijar/common.h create mode 100644 tools/ijar/ijar.cc create mode 100644 tools/ijar/zip.cc create mode 100644 tools/ijar/zip.h create mode 100644 tools/ijar/zip_main.cc diff --git a/core/base_rules.mk b/core/base_rules.mk index 42dfc1b4b..5ee5edf82 100644 --- a/core/base_rules.mk +++ b/core/base_rules.mk @@ -223,25 +223,15 @@ LOCAL_INTERMEDIATE_TARGETS += $(LOCAL_BUILT_MODULE) ########################################################### ## Create .toc files from shared objects to reduce unnecessary rebuild # .toc files have the list of external dynamic symbols without their addresses. -# For ninja build, .toc files will be updated only when the content of .toc -# files are changed. As .KATI_RESTAT is specified to .toc files, dependent -# binaries of a .toc file will be rebuilt only when the content of +# As .KATI_RESTAT is specified to .toc files and commit-change-for-toc is used, +# dependent binaries of a .toc file will be rebuilt only when the content of # the .toc file is changed. ########################################################### ifeq ($(LOCAL_MODULE_CLASS),SHARED_LIBRARIES) LOCAL_INTERMEDIATE_TARGETS += $(LOCAL_BUILT_MODULE).toc $(LOCAL_BUILT_MODULE).toc: $(LOCAL_BUILT_MODULE) -ifeq ($(BUILDING_WITH_NINJA),true) $(call $(PRIVATE_2ND_ARCH_VAR_PREFIX)$(PRIVATE_PREFIX)transform-shared-lib-to-toc,$<,$@.tmp) - $(hide) if cmp -s $@.tmp $@ ; then \ - rm $@.tmp ; \ - else \ - mv $@.tmp $@ ; \ - fi -else - @# make doesn't support restat. We always update .toc files so the dependents will always be updated too. - $(call $(PRIVATE_2ND_ARCH_VAR_PREFIX)$(PRIVATE_PREFIX)transform-shared-lib-to-toc,$<,$@) -endif + $(call commit-change-for-toc,$@) # Kati adds restat=1 to ninja. GNU make does nothing for this. .KATI_RESTAT: $(LOCAL_BUILT_MODULE).toc diff --git a/core/config.mk b/core/config.mk index 49810ee68..e615159da 100644 --- a/core/config.mk +++ b/core/config.mk @@ -540,6 +540,10 @@ ifndef TARGET_BUILD_APPS ZIPTIME := $(HOST_OUT_EXECUTABLES)/ziptime$(HOST_EXECUTABLE_SUFFIX) endif +# ijar converts a .jar file to a smaller .jar file which only has its +# interfaces. +IJAR := $(HOST_OUT_EXECUTABLES)/ijar$(BUILD_EXECUTABLE_SUFFIX) + # relocation packer RELOCATION_PACKER := prebuilts/misc/$(BUILD_OS)-$(HOST_PREBUILT_ARCH)/relocation_packer/relocation_packer diff --git a/core/definitions.mk b/core/definitions.mk index 0089865ab..b7a7f800f 100644 --- a/core/definitions.mk +++ b/core/definitions.mk @@ -1927,6 +1927,42 @@ define transform-jar-to-jack $(hide) rm $@.tmpjill.jack endef +# Moves $1.tmp to $1 if necessary. This is designed to be used with +# .KATI_RESTAT. For kati, this function doesn't update the timestamp +# of $1 when $1.tmp is identical to $1 so that ninja won't rebuild +# targets which depend on $1. For GNU make, this function simply +# copies $1.tmp to $1. +ifeq ($(BUILDING_WITH_NINJA),true) +define commit-change-for-toc +$(hide) if cmp -s $1.tmp $1 ; then \ + rm $1.tmp ; \ +else \ + mv $1.tmp $1 ; \ +fi +endef +else +define commit-change-for-toc +@# make doesn't support restat. We always update .toc files so the dependents will always be updated too. +$(hide) mv $1.tmp $1 +endef +endif + +## Rule to creates a table of contents from a .jar file. +## Must be called with $(eval). +# $1: A .jar file +define _transform-jar-to-toc +$1.toc: $1 | $(IJAR) + @echo Generating TOC: $$@ + $(hide) $(IJAR) $$< $$@.tmp + $$(call commit-change-for-toc,$$@) +endef + +## Define a rule which generates .jar.toc and mark it as .KATI_RESTAT. +define define-jar-to-toc-rule +$(eval $(call _transform-jar-to-toc,$1)) +$(eval .KATI_RESTAT: $1.toc) +endef + # Invoke Jack to compile java from source to jack files without shrink or obfuscation. # diff --git a/core/java.mk b/core/java.mk index f9305df7e..9bdb77f45 100644 --- a/core/java.mk +++ b/core/java.mk @@ -456,6 +456,8 @@ $(full_classes_jar): $(full_classes_emma_jar) | $(ACP) @echo Copying: $@ $(hide) $(ACP) -fp $< $@ +$(call define-jar-to-toc-rule, $(full_classes_jar)) + # Run proguard if necessary, otherwise just copy the file. ifdef LOCAL_PROGUARD_ENABLED ifneq ($(filter-out full custom nosystem obfuscation optimization shrinktests,$(LOCAL_PROGUARD_ENABLED)),) diff --git a/core/java_common.mk b/core/java_common.mk index 9d810962d..b7f07ab18 100644 --- a/core/java_common.mk +++ b/core/java_common.mk @@ -147,6 +147,7 @@ endif # LOCAL_SDK_VERSION full_shared_java_libs := $(call java-lib-files,$(LOCAL_JAVA_LIBRARIES),$(LOCAL_IS_HOST_MODULE)) full_java_lib_deps := $(call java-lib-deps,$(LOCAL_JAVA_LIBRARIES),$(LOCAL_IS_HOST_MODULE)) +full_java_lib_deps := $(addsuffix .toc, $(full_java_lib_deps)) else # LOCAL_IS_HOST_MODULE diff --git a/core/prebuilt_internal.mk b/core/prebuilt_internal.mk index 3af71015e..0a8a7f83a 100644 --- a/core/prebuilt_internal.mk +++ b/core/prebuilt_internal.mk @@ -310,6 +310,8 @@ $(common_classes_jar) : $(my_src_jar) | $(ACP) $(common_javalib_jar) : $(common_classes_jar) | $(ACP) $(transform-prebuilt-to-target) +$(call define-jar-to-toc-rule, $(common_classes_jar)) + # make sure the classes.jar and javalib.jar are built before $(LOCAL_BUILT_MODULE) $(built_module) : $(common_javalib_jar) endif # TARGET JAVA_LIBRARIES diff --git a/tools/ijar/Android.mk b/tools/ijar/Android.mk new file mode 100644 index 000000000..356e109b7 --- /dev/null +++ b/tools/ijar/Android.mk @@ -0,0 +1,14 @@ +# Copyright 2015 The Android Open Source Project +# +# The rest of files in this directory comes from +# https://github.com/bazelbuild/bazel/tree/master/third_party/ijar + +LOCAL_PATH:= $(call my-dir) + +include $(CLEAR_VARS) +LOCAL_CPP_EXTENSION := cc +LOCAL_SRC_FILES := classfile.cc ijar.cc zip.cc +LOCAL_CFLAGS += -Wall +LOCAL_SHARED_LIBRARIES := libz-host +LOCAL_MODULE := ijar +include $(BUILD_HOST_EXECUTABLE) diff --git a/tools/ijar/LICENSE b/tools/ijar/LICENSE new file mode 100644 index 000000000..6b0b1270f --- /dev/null +++ b/tools/ijar/LICENSE @@ -0,0 +1,203 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/tools/ijar/README.txt b/tools/ijar/README.txt new file mode 100644 index 000000000..d5a6a0fd7 --- /dev/null +++ b/tools/ijar/README.txt @@ -0,0 +1,120 @@ + +ijar: A tool for generating interface .jars from normal .jars +============================================================= + +Alan Donovan, 26 May 2007. + +Rationale: + + In order to improve the speed of compilation of Java programs in + Bazel, the output of build steps is cached. + + This works very nicely for C++ compilation: a compilation unit + includes a .cc source file and typically dozens of header files. + Header files change relatively infrequently, so the need for a + rebuild is usually driven by a change in the .cc file. Even after + syncing a slightly newer version of the tree and doing a rebuild, + many hits in the cache are still observed. + + In Java, by contrast, a compilation unit involves a set of .java + source files, plus a set of .jar files containing already-compiled + JVM .class files. Class files serve a dual purpose: from the JVM's + perspective, they are containers of executable code, but from the + compiler's perspective, they are interface definitions. The problem + here is that .jar files are very much more sensitive to change than + C++ header files, so even a change that is insignificant to the + compiler (such as the addition of a print statement to a method in a + prerequisite class) will cause the jar to change, and any code that + depends on this jar's interface will be recompiled unnecessarily. + + The purpose of ijar is to produce, from a .jar file, a much smaller, + simpler .jar file containing only the parts that are significant for + the purposes of compilation. In other words, an interface .jar + file. By changing ones compilation dependencies to be the interface + jar files, unnecessary recompilation is avoided when upstream + changes don't affect the interface. + +Details: + + ijar is a tool that reads a .jar file and emits a .jar file + containing only the parts that are relevant to Java compilation. + For example, it throws away: + + - Files whose name does not end in ".class". + - All executable method code. + - All private methods and fields. + - All constants and attributes except the minimal set necessary to + describe the class interface. + - All debugging information + (LineNumberTable, SourceFile, LocalVariableTables attributes). + + It also sets to zero the file modification times in the index of the + .jar file. + +Implementation: + + ijar is implemented in C++, and runs very quickly. For example + (when optimized) it takes only 530ms to process a 42MB + .jar file containing 5878 classe, resulting in an interface .jar + file of only 11.4MB in size. For more usual .jar sizes of a few + megabytes, a runtime of 50ms is typical. + + The implementation strategy is to mmap both the input jar and the + newly-created _interface.jar, and to scan through the former and + emit the latter in a single pass. There are a couple of locations + where some kind of "backpatching" is required: + + - in the .zip file format, for each file, the size field precedes + the data. We emit a zero but note its location, generate and emit + the stripped classfile, then poke the correct size into the + location. + + - for JVM .class files, the header (including the constant table) + precedes the body, but cannot be emitted before it because it's + not until we emit the body that we know which constants are + referenced and which are garbage. So we emit the body into a + temporary buffer, then emit the header to the output jar, followed + by the contents of the temp buffer. + + Also note that the zip file format has unnecessary duplication of + the index metadata: it has header+data for each file, then another + set of (similar) headers at the end. Rather than save the metadata + explicitly in some datastructure, we just record the addresses of + the already-emitted zip metadata entries in the output file, and + then read from there as necessary. + +Notes: + + This code has no dependency except on the STL and on zlib. + + Almost all of the getX/putX/ReadX/WriteX functions in the code + advance their first argument pointer, which is passed by reference. + + It's tempting to discard package-private classes and class members. + However, this would be incorrect because they are a necessary part + of the package interface, as a Java package is often compiled in + multiple stages. For example: in Bazel, both java tests and java + code inhabit the same Java package but are compiled separately. + +Assumptions: + + We assume that jar files are uncompressed v1.0 zip files (created + with 'jar c0f') with a zero general_purpose_bit_flag. + + We assume that javap/javac don't need the correct CRC checksums in + the .jar file. + + We assume that it's better simply to abort in the face of unknown + input than to risk leaving out something important from the output + (although in the case of annotations, it should be safe to ignore + ones we don't understand). + +TODO: + Maybe: ensure a canonical sort order is used for every list (jar + entries, class members, attributes, etc.) This isn't essential + because we can assume the compiler is deterministic and the order in + the source files changes little. Also, it would require two passes. :( + + Maybe: delete dynamically-allocated memory. + + Add (a lot) more tests. Include a test of idempotency. diff --git a/tools/ijar/classfile.cc b/tools/ijar/classfile.cc new file mode 100644 index 000000000..5d12cc2e0 --- /dev/null +++ b/tools/ijar/classfile.cc @@ -0,0 +1,1788 @@ +// Copyright 2001,2007 Alan Donovan. All rights reserved. +// +// Author: Alan Donovan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// classfile.cc -- classfile parsing and stripping. +// + +// TODO(adonovan) don't pass pointers by reference; this is not +// compatible with Google C++ style. + +// See README.txt for details. +// +// For definition of JVM class file format, see: +// Java SE 8 Edition: +// http://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4 + +#define __STDC_FORMAT_MACROS 1 +#define __STDC_LIMIT_MACROS 1 +#include // for PRIx32 +#include +#include +#include +#include + +#include +#include +#include + +#include "common.h" + +namespace devtools_ijar { + +// See Table 4.3 in JVM Spec. +enum CONSTANT { + CONSTANT_Class = 7, + CONSTANT_FieldRef = 9, + CONSTANT_Methodref = 10, + CONSTANT_Interfacemethodref = 11, + CONSTANT_String = 8, + CONSTANT_Integer = 3, + CONSTANT_Float = 4, + CONSTANT_Long = 5, + CONSTANT_Double = 6, + CONSTANT_NameAndType = 12, + CONSTANT_Utf8 = 1, + CONSTANT_MethodHandle = 15, + CONSTANT_MethodType = 16, + CONSTANT_InvokeDynamic = 18 +}; + +// See Tables 4.1, 4.4, 4.5 in JVM Spec. +enum ACCESS { + ACC_PUBLIC = 0x0001, + ACC_PRIVATE = 0x0002, + ACC_PROTECTED = 0x0004, + ACC_STATIC = 0x0008, + ACC_FINAL = 0x0010, + ACC_SYNCHRONIZED = 0x0020, + ACC_VOLATILE = 0x0040, + ACC_TRANSIENT = 0x0080, + ACC_INTERFACE = 0x0200, + ACC_ABSTRACT = 0x0400 +}; + +// See Table 4.7.20-A in Java 8 JVM Spec. +enum TARGET_TYPE { + // Targets for type parameter declarations (ElementType.TYPE_PARAMETER): + CLASS_TYPE_PARAMETER = 0x00, + METHOD_TYPE_PARAMETER = 0x01, + + // Targets for type uses that may be externally visible in classes and members + // (ElementType.TYPE_USE): + CLASS_EXTENDS = 0x10, + CLASS_TYPE_PARAMETER_BOUND = 0x11, + METHOD_TYPE_PARAMETER_BOUND = 0x12, + FIELD = 0x13, + METHOD_RETURN = 0x14, + METHOD_RECEIVER = 0x15, + METHOD_FORMAL_PARAMETER = 0x16, + THROWS = 0x17, + + // TARGET_TYPE >= 0x40 is reserved for type uses that occur only within code + // blocks. Ijar doesn't need to know about these. +}; + +struct Constant; + +// TODO(adonovan) these globals are unfortunate +static std::vector const_pool_in; // input constant pool +static std::vector const_pool_out; // output constant_pool +static std::set used_class_names; +static Constant * class_name; + +// Returns the Constant object, given an index into the input constant pool. +// Note: constant(0) == NULL; this invariant is exploited by the +// InnerClassesAttribute, inter alia. +inline Constant *constant(int idx) { + if (idx < 0 || (unsigned)idx >= const_pool_in.size()) { + fprintf(stderr, "Illegal constant pool index: %d\n", idx); + abort(); + } + return const_pool_in[idx]; +} + +/********************************************************************** + * * + * Constants * + * * + **********************************************************************/ + +// See sec.4.4 of JVM spec. +struct Constant { + + Constant(u1 tag) : + slot_(0), + tag_(tag) {} + + virtual ~Constant() {} + + // For UTF-8 string constants, returns the encoded string. + // Otherwise, returns an undefined string value suitable for debugging. + virtual std::string Display() = 0; + + virtual void Write(u1 *&p) = 0; + + // Called by slot() when a constant has been identified as required + // in the output classfile's constant pool. This is a hook allowing + // constants to register their dependency on other constants, by + // calling slot() on them in turn. + virtual void Keep() {} + + bool Kept() { + return slot_ != 0; + } + + // Returns the index of this constant in the output class's constant + // pool, assigning a slot if not already done. + u2 slot() { + if (slot_ == 0) { + Keep(); + slot_ = const_pool_out.size(); // BugBot's "narrowing" warning + // is bogus. The number of + // output constants can't exceed + // the number of input constants. + if (slot_ == 0) { + fprintf(stderr, "Constant::slot() called before output phase.\n"); + abort(); + } + const_pool_out.push_back(this); + if (tag_ == CONSTANT_Long || tag_ == CONSTANT_Double) { + const_pool_out.push_back(NULL); + } + } + return slot_; + } + + u2 slot_; // zero => "this constant is unreachable garbage" + u1 tag_; +}; + +// Extracts class names from a signature and puts them into the global +// variable used_class_names. +// +// desc: the descriptor class names should be extracted from. +// p: the position where the extraction should tart. +void ExtractClassNames(const std::string& desc, size_t* p); + +// See sec.4.4.1 of JVM spec. +struct Constant_Class : Constant +{ + Constant_Class(u2 name_index) : + Constant(CONSTANT_Class), + name_index_(name_index) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, constant(name_index_)->slot()); + } + + std::string Display() { + return constant(name_index_)->Display(); + } + + void Keep() { constant(name_index_)->slot(); } + + u2 name_index_; +}; + +// See sec.4.4.2 of JVM spec. +struct Constant_FMIref : Constant +{ + Constant_FMIref(u1 tag, + u2 class_index, + u2 name_type_index) : + Constant(tag), + class_index_(class_index), + name_type_index_(name_type_index) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, constant(class_index_)->slot()); + put_u2be(p, constant(name_type_index_)->slot()); + } + + std::string Display() { + return constant(class_index_)->Display() + "::" + + constant(name_type_index_)->Display(); + } + + void Keep() { + constant(class_index_)->slot(); + constant(name_type_index_)->slot(); + } + + u2 class_index_; + u2 name_type_index_; +}; + +// See sec.4.4.3 of JVM spec. +struct Constant_String : Constant +{ + Constant_String(u2 string_index) : + Constant(CONSTANT_String), + string_index_(string_index) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, constant(string_index_)->slot()); + } + + std::string Display() { + return "\"" + constant(string_index_)->Display() + "\""; + } + + void Keep() { constant(string_index_)->slot(); } + + u2 string_index_; +}; + +// See sec.4.4.4 of JVM spec. +struct Constant_IntegerOrFloat : Constant +{ + Constant_IntegerOrFloat(u1 tag, u4 bytes) : + Constant(tag), + bytes_(bytes) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u4be(p, bytes_); + } + + std::string Display() { return "int/float"; } + + u4 bytes_; +}; + +// See sec.4.4.5 of JVM spec. +struct Constant_LongOrDouble : Constant_IntegerOrFloat +{ + Constant_LongOrDouble(u1 tag, u4 high_bytes, u4 low_bytes) : + Constant_IntegerOrFloat(tag, high_bytes), + low_bytes_(low_bytes) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u4be(p, bytes_); + put_u4be(p, low_bytes_); + } + + std::string Display() { return "long/double"; } + + u4 low_bytes_; +}; + +// See sec.4.4.6 of JVM spec. +struct Constant_NameAndType : Constant +{ + Constant_NameAndType(u2 name_index, u2 descr_index) : + Constant(CONSTANT_NameAndType), + name_index_(name_index), + descr_index_(descr_index) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, constant(name_index_)->slot()); + put_u2be(p, constant(descr_index_)->slot()); + } + + std::string Display() { + return constant(name_index_)->Display() + "::" + + constant(descr_index_)->Display(); + } + + void Keep() { + constant(name_index_)->slot(); + constant(descr_index_)->slot(); + } + + u2 name_index_; + u2 descr_index_; +}; + +// See sec.4.4.7 of JVM spec. +struct Constant_Utf8 : Constant +{ + Constant_Utf8(u4 length, const u1 *utf8) : + Constant(CONSTANT_Utf8), + length_(length), + utf8_(utf8) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, length_); + put_n(p, utf8_, length_); + } + + std::string Display() { + return std::string((const char*) utf8_, length_); + } + + u4 length_; + const u1 *utf8_; +}; + +// See sec.4.4.8 of JVM spec. +struct Constant_MethodHandle : Constant +{ + Constant_MethodHandle(u1 reference_kind, u2 reference_index) : + Constant(CONSTANT_MethodHandle), + reference_kind_(reference_kind), + reference_index_(reference_index) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u1(p, reference_kind_); + put_u2be(p, reference_index_); + } + + std::string Display() { + return "Constant_MethodHandle::" + std::to_string(reference_kind_) + "::" + + constant(reference_index_)->Display(); + } + + u1 reference_kind_; + u2 reference_index_; +}; + +// See sec.4.4.9 of JVM spec. +struct Constant_MethodType : Constant +{ + Constant_MethodType(u2 descriptor_index) : + Constant(CONSTANT_MethodType), + descriptor_index_(descriptor_index) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, descriptor_index_); + } + + std::string Display() { + return "Constant_MethodType::" + constant(descriptor_index_)->Display(); + } + + u2 descriptor_index_; +}; + +// See sec.4.4.10 of JVM spec. +struct Constant_InvokeDynamic : Constant +{ + Constant_InvokeDynamic(u2 bootstrap_method_attr_index, u2 name_and_type_index) : + Constant(CONSTANT_InvokeDynamic), + bootstrap_method_attr_index_(bootstrap_method_attr_index), + name_and_type_index_(name_and_type_index) {} + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, bootstrap_method_attr_index_); + put_u2be(p, name_and_type_index_); + } + + std::string Display() { + return "Constant_InvokeDynamic::" + + std::to_string(bootstrap_method_attr_index_) + "::" + + constant(name_and_type_index_)->Display(); + } + + u2 bootstrap_method_attr_index_; + u2 name_and_type_index_; +}; + +/********************************************************************** + * * + * Attributes * + * * + **********************************************************************/ + +// See sec.4.7 of JVM spec. +struct Attribute { + + virtual ~Attribute() {} + virtual void Write(u1 *&p) = 0; + virtual void ExtractClassNames() {} + + void WriteProlog(u1 *&p, u2 length) { + put_u2be(p, attribute_name_->slot()); + put_u4be(p, length); + } + + Constant *attribute_name_; +}; + +// See sec.4.7.5 of JVM spec. +struct ExceptionsAttribute : Attribute { + + static ExceptionsAttribute* Read(const u1 *&p, Constant *attribute_name) { + ExceptionsAttribute *attr = new ExceptionsAttribute; + attr->attribute_name_ = attribute_name; + u2 number_of_exceptions = get_u2be(p); + for (int ii = 0; ii < number_of_exceptions; ++ii) { + attr->exceptions_.push_back(constant(get_u2be(p))); + } + return attr; + } + + void Write(u1 *&p) { + WriteProlog(p, exceptions_.size() * 2 + 2); + put_u2be(p, exceptions_.size()); + for (size_t ii = 0; ii < exceptions_.size(); ++ii) { + put_u2be(p, exceptions_[ii]->slot()); + } + } + + std::vector exceptions_; +}; + +// See sec.4.7.6 of JVM spec. +struct InnerClassesAttribute : Attribute { + + struct Entry { + Constant *inner_class_info; + Constant *outer_class_info; + Constant *inner_name; + u2 inner_class_access_flags; + }; + + virtual ~InnerClassesAttribute() { + for (size_t i = 0; i < entries_.size(); i++) { + delete entries_[i]; + } + } + + static InnerClassesAttribute* Read(const u1 *&p, Constant *attribute_name) { + InnerClassesAttribute *attr = new InnerClassesAttribute; + attr->attribute_name_ = attribute_name; + + u2 number_of_classes = get_u2be(p); + for (int ii = 0; ii < number_of_classes; ++ii) { + Entry *entry = new Entry; + entry->inner_class_info = constant(get_u2be(p)); + entry->outer_class_info = constant(get_u2be(p)); + entry->inner_name = constant(get_u2be(p)); + entry->inner_class_access_flags = get_u2be(p); + + attr->entries_.push_back(entry); + } + return attr; + } + + void Write(u1 *&p) { + std::set kept_entries; + // We keep an entry if the constant referring to the inner class is already + // kept. Then we mark its outer class and its class name as kept, too, then + // iterate until a fixed point is reached. + int entry_count; + int iteration = 0; + + do { + entry_count = kept_entries.size(); + for (int i_entry = 0; i_entry < entries_.size(); ++i_entry) { + Entry* entry = entries_[i_entry]; + if (entry->inner_class_info->Kept() || + used_class_names.find(entry->inner_class_info->Display()) + != used_class_names.end() || + entry->outer_class_info == class_name || + entry->outer_class_info == NULL || + entry->inner_name == NULL) { + kept_entries.insert(i_entry); + + // These are zero for anonymous inner classes + if (entry->outer_class_info != NULL) { + entry->outer_class_info->slot(); + } + + if (entry->inner_name != NULL) { + entry->inner_name->slot(); + } + } + } + iteration += 1; + } while (entry_count != kept_entries.size()); + + if (kept_entries.size() == 0) { + return; + } + + WriteProlog(p, 2 + kept_entries.size() * 8); + put_u2be(p, kept_entries.size()); + + for (std::set::iterator it = kept_entries.begin(); + it != kept_entries.end(); + ++it) { + Entry *entry = entries_[*it]; + put_u2be(p, entry->inner_class_info == NULL + ? 0 + : entry->inner_class_info->slot()); + put_u2be(p, entry->outer_class_info == NULL + ? 0 + : entry->outer_class_info->slot()); + put_u2be(p, entry->inner_name == NULL + ? 0 + : entry->inner_name->slot()); + put_u2be(p, entry->inner_class_access_flags); + } + } + + std::vector entries_; +}; + +// See sec.4.7.7 of JVM spec. +// We preserve EnclosingMethod attributes to be able to identify local and +// anonymous classes. These classes will be stripped of most content, as they +// represent implementation details that shoudn't leak into the ijars. Omitting +// EnclosingMethod attributes can lead to type-checking failures in the presence +// of generics (see b/9070939). +struct EnclosingMethodAttribute : Attribute { + + static EnclosingMethodAttribute* Read(const u1 *&p, + Constant *attribute_name) { + EnclosingMethodAttribute *attr = new EnclosingMethodAttribute; + attr->attribute_name_ = attribute_name; + attr->class_ = constant(get_u2be(p)); + attr->method_ = constant(get_u2be(p)); + return attr; + } + + void Write(u1 *&p) { + WriteProlog(p, 4); + put_u2be(p, class_->slot()); + put_u2be(p, method_ == NULL ? 0 : method_->slot()); + } + + Constant *class_; + Constant *method_; +}; + +// See sec.4.7.16.1 of JVM spec. +// Used by AnnotationDefault and other attributes. +struct ElementValue { + virtual ~ElementValue() {} + virtual void Write(u1 *&p) = 0; + virtual void ExtractClassNames() {} + static ElementValue* Read(const u1 *&p); + u1 tag_; + u4 length_; +}; + +struct BaseTypeElementValue : ElementValue { + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, const_value_->slot()); + } + static BaseTypeElementValue *Read(const u1 *&p) { + BaseTypeElementValue *value = new BaseTypeElementValue; + value->const_value_ = constant(get_u2be(p)); + return value; + } + Constant *const_value_; +}; + +struct EnumTypeElementValue : ElementValue { + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, type_name_->slot()); + put_u2be(p, const_name_->slot()); + } + static EnumTypeElementValue *Read(const u1 *&p) { + EnumTypeElementValue *value = new EnumTypeElementValue; + value->type_name_ = constant(get_u2be(p)); + value->const_name_ = constant(get_u2be(p)); + return value; + } + Constant *type_name_; + Constant *const_name_; +}; + +struct ClassTypeElementValue : ElementValue { + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, class_info_->slot()); + } + + virtual void ExtractClassNames() { + size_t idx = 0; + devtools_ijar::ExtractClassNames(class_info_->Display(), &idx); + } + + static ClassTypeElementValue *Read(const u1 *&p) { + ClassTypeElementValue *value = new ClassTypeElementValue; + value->class_info_ = constant(get_u2be(p)); + return value; + } + Constant *class_info_; +}; + +struct ArrayTypeElementValue : ElementValue { + virtual ~ArrayTypeElementValue() { + for (size_t i = 0; i < values_.size(); i++) { + delete values_[i]; + } + } + + virtual void ExtractClassNames() { + for (int i = 0; i < values_.size(); i++) { + values_[i]->ExtractClassNames(); + } + } + + void Write(u1 *&p) { + put_u1(p, tag_); + put_u2be(p, values_.size()); + for (size_t ii = 0; ii < values_.size(); ++ii) { + values_[ii]->Write(p); + } + } + static ArrayTypeElementValue *Read(const u1 *&p) { + ArrayTypeElementValue *value = new ArrayTypeElementValue; + u2 num_values = get_u2be(p); + for (int ii = 0; ii < num_values; ++ii) { + value->values_.push_back(ElementValue::Read(p)); + } + return value; + } + std::vector values_; +}; + +// See sec.4.7.16 of JVM spec. +struct Annotation { + virtual ~Annotation() { + for (size_t i = 0; i < element_value_pairs_.size(); i++) { + delete element_value_pairs_[i]->element_value_; + delete element_value_pairs_[i]; + } + } + + void ExtractClassNames() { + for (size_t i = 0; i < element_value_pairs_.size(); i++) { + element_value_pairs_[i]->element_value_->ExtractClassNames(); + } + } + + void Write(u1 *&p) { + put_u2be(p, type_->slot()); + put_u2be(p, element_value_pairs_.size()); + for (size_t ii = 0; ii < element_value_pairs_.size(); ++ii) { + put_u2be(p, element_value_pairs_[ii]->element_name_->slot()); + element_value_pairs_[ii]->element_value_->Write(p); + } + } + static Annotation *Read(const u1 *&p) { + Annotation *value = new Annotation; + value->type_ = constant(get_u2be(p)); + u2 num_element_value_pairs = get_u2be(p); + for (int ii = 0; ii < num_element_value_pairs; ++ii) { + ElementValuePair *pair = new ElementValuePair; + pair->element_name_ = constant(get_u2be(p)); + pair->element_value_ = ElementValue::Read(p); + value->element_value_pairs_.push_back(pair); + } + return value; + } + Constant *type_; + struct ElementValuePair { + Constant *element_name_; + ElementValue *element_value_; + }; + std::vector element_value_pairs_; +}; + +// See sec 4.7.20 of Java 8 JVM Spec +// +// Each entry in the annotations table represents a single run-time visible +// annotation on a type used in a declaration or expression. The type_annotation +// structure has the following format: +// +// type_annotation { +// u1 target_type; +// union { +// type_parameter_target; +// supertype_target; +// type_parameter_bound_target; +// empty_target; +// method_formal_parameter_target; +// throws_target; +// localvar_target; +// catch_target; +// offset_target; +// type_argument_target; +// } target_info; +// type_path target_path; +// u2 type_index; +// u2 num_element_value_pairs; +// { +// u2 element_name_index; +// element_value value; +// } +// element_value_pairs[num_element_value_pairs]; +// } +// +struct TypeAnnotation { + virtual ~TypeAnnotation() { + delete target_info_; + delete type_path_; + delete annotation_; + } + + void ExtractClassNames() { + annotation_->ExtractClassNames(); + } + + void Write(u1 *&p) { + put_u1(p, target_type_); + target_info_->Write(p); + type_path_->Write(p); + annotation_->Write(p); + } + + static TypeAnnotation *Read(const u1 *&p) { + TypeAnnotation *value = new TypeAnnotation; + value->target_type_ = get_u1(p); + value->target_info_ = ReadTargetInfo(p, value->target_type_); + value->type_path_ = TypePath::Read(p); + value->annotation_ = Annotation::Read(p); + return value; + } + + struct TargetInfo { + virtual ~TargetInfo() {} + virtual void Write(u1 *&p) = 0; + }; + + struct TypeParameterTargetInfo : TargetInfo { + void Write(u1 *&p) { + put_u1(p, type_parameter_index_); + } + static TypeParameterTargetInfo *Read(const u1 *&p) { + TypeParameterTargetInfo *value = new TypeParameterTargetInfo; + value->type_parameter_index_ = get_u1(p); + return value; + } + u1 type_parameter_index_; + }; + + struct ClassExtendsInfo : TargetInfo { + void Write(u1 *&p) { + put_u2be(p, supertype_index_); + } + static ClassExtendsInfo *Read(const u1 *&p) { + ClassExtendsInfo *value = new ClassExtendsInfo; + value->supertype_index_ = get_u2be(p); + return value; + } + u2 supertype_index_; + }; + + struct TypeParameterBoundInfo : TargetInfo { + void Write(u1 *&p) { + put_u1(p, type_parameter_index_); + put_u1(p, bound_index_); + } + static TypeParameterBoundInfo *Read(const u1 *&p) { + TypeParameterBoundInfo *value = new TypeParameterBoundInfo; + value->type_parameter_index_ = get_u1(p); + value->bound_index_ = get_u1(p); + return value; + } + u1 type_parameter_index_; + u1 bound_index_; + }; + + struct EmptyInfo : TargetInfo { + void Write(u1 *&p) {} + static EmptyInfo *Read(const u1 *&p) { + return new EmptyInfo; + } + }; + + struct MethodFormalParameterInfo : TargetInfo { + void Write(u1 *&p) { + put_u1(p, method_formal_parameter_index_); + } + static MethodFormalParameterInfo *Read(const u1 *&p) { + MethodFormalParameterInfo *value = new MethodFormalParameterInfo; + value->method_formal_parameter_index_ = get_u1(p); + return value; + } + u1 method_formal_parameter_index_; + }; + + struct ThrowsTypeInfo : TargetInfo { + void Write(u1 *&p) { + put_u2be(p, throws_type_index_); + } + static ThrowsTypeInfo *Read(const u1 *&p) { + ThrowsTypeInfo *value = new ThrowsTypeInfo; + value->throws_type_index_ = get_u2be(p); + return value; + } + u2 throws_type_index_; + }; + + static TargetInfo *ReadTargetInfo(const u1 *&p, u1 target_type) { + switch (target_type) { + case CLASS_TYPE_PARAMETER: + case METHOD_TYPE_PARAMETER: + return TypeParameterTargetInfo::Read(p); + case CLASS_EXTENDS: + return ClassExtendsInfo::Read(p); + case CLASS_TYPE_PARAMETER_BOUND: + case METHOD_TYPE_PARAMETER_BOUND: + return TypeParameterBoundInfo::Read(p); + case FIELD: + case METHOD_RETURN: + case METHOD_RECEIVER: + return new EmptyInfo; + case METHOD_FORMAL_PARAMETER: + return MethodFormalParameterInfo::Read(p); + case THROWS: + return ThrowsTypeInfo::Read(p); + default: + fprintf(stderr, "Illegal type annotation target type: %d\n", + target_type); + abort(); + } + } + + struct TypePath { + void Write(u1 *&p) { + put_u1(p, path_.size()); + for (TypePathEntry entry : path_) { + put_u1(p, entry.type_path_kind_); + put_u1(p, entry.type_argument_index_); + } + } + static TypePath *Read(const u1 *&p) { + TypePath *value = new TypePath; + u1 path_length = get_u1(p); + for (int ii = 0; ii < path_length; ++ii) { + TypePathEntry entry; + entry.type_path_kind_ = get_u1(p); + entry.type_argument_index_ = get_u1(p); + value->path_.push_back(entry); + } + return value; + } + + struct TypePathEntry { + u1 type_path_kind_; + u1 type_argument_index_; + }; + std::vector path_; + }; + + u1 target_type_; + TargetInfo *target_info_; + TypePath *type_path_; + Annotation *annotation_; +}; + +struct AnnotationTypeElementValue : ElementValue { + virtual ~AnnotationTypeElementValue() { + delete annotation_; + } + + void Write(u1 *&p) { + put_u1(p, tag_); + annotation_->Write(p); + } + static AnnotationTypeElementValue *Read(const u1 *&p) { + AnnotationTypeElementValue *value = new AnnotationTypeElementValue; + value->annotation_ = Annotation::Read(p); + return value; + } + + Annotation *annotation_; +}; + +ElementValue* ElementValue::Read(const u1 *&p) { + const u1* start = p; + ElementValue *result; + u1 tag = get_u1(p); + if (tag != 0 && strchr("BCDFIJSZs", (char) tag) != NULL) { + result = BaseTypeElementValue::Read(p); + } else if ((char) tag == 'e') { + result = EnumTypeElementValue::Read(p); + } else if ((char) tag == 'c') { + result = ClassTypeElementValue::Read(p); + } else if ((char) tag == '[') { + result = ArrayTypeElementValue::Read(p); + } else if ((char) tag == '@') { + result = AnnotationTypeElementValue::Read(p); + } else { + fprintf(stderr, "Illegal element_value::tag: %d\n", tag); + abort(); + } + result->tag_ = tag; + result->length_ = p - start; + return result; +} + +// See sec.4.7.20 of JVM spec. +// We preserve AnnotationDefault attributes because they are required +// in order to make use of an annotation in new code. +struct AnnotationDefaultAttribute : Attribute { + virtual ~AnnotationDefaultAttribute() { + delete default_value_; + } + + static AnnotationDefaultAttribute* Read(const u1 *&p, + Constant *attribute_name) { + AnnotationDefaultAttribute *attr = new AnnotationDefaultAttribute; + attr->attribute_name_ = attribute_name; + attr->default_value_ = ElementValue::Read(p); + return attr; + } + + void Write(u1 *&p) { + WriteProlog(p, default_value_->length_); + default_value_->Write(p); + } + + virtual void ExtractClassNames() { + default_value_->ExtractClassNames(); + } + + ElementValue *default_value_; +}; + +// See sec.4.7.2 of JVM spec. +// We preserve ConstantValue attributes because they are required for +// compile-time constant propagation. +struct ConstantValueAttribute : Attribute { + + static ConstantValueAttribute* Read(const u1 *&p, Constant *attribute_name) { + ConstantValueAttribute *attr = new ConstantValueAttribute; + attr->attribute_name_ = attribute_name; + attr->constantvalue_ = constant(get_u2be(p)); + return attr; + } + + void Write(u1 *&p) { + WriteProlog(p, 2); + put_u2be(p, constantvalue_->slot()); + } + + Constant *constantvalue_; +}; + +// See sec.4.7.9 of JVM spec. +// We preserve Signature attributes because they are required by the +// compiler for type-checking of generics. +struct SignatureAttribute : Attribute { + + static SignatureAttribute* Read(const u1 *&p, Constant *attribute_name) { + SignatureAttribute *attr = new SignatureAttribute; + attr->attribute_name_ = attribute_name; + attr->signature_ = constant(get_u2be(p)); + return attr; + } + + void Write(u1 *&p) { + WriteProlog(p, 2); + put_u2be(p, signature_->slot()); + } + + virtual void ExtractClassNames() { + size_t signature_idx = 0; + devtools_ijar::ExtractClassNames(signature_->Display(), &signature_idx); + } + + Constant *signature_; +}; + +// See sec.4.7.15 of JVM spec. +// We preserve Deprecated attributes because they are required by the +// compiler to generate warning messages. +struct DeprecatedAttribute : Attribute { + + static DeprecatedAttribute* Read(const u1 *&p, Constant *attribute_name) { + DeprecatedAttribute *attr = new DeprecatedAttribute; + attr->attribute_name_ = attribute_name; + return attr; + } + + void Write(u1 *&p) { + WriteProlog(p, 0); + } +}; + +// See sec.4.7.16-17 of JVM spec v3. Includes RuntimeVisible and +// RuntimeInvisible. +// +// We preserve all annotations. +struct AnnotationsAttribute : Attribute { + virtual ~AnnotationsAttribute() { + for (size_t i = 0; i < annotations_.size(); i++) { + delete annotations_[i]; + } + } + + static AnnotationsAttribute* Read(const u1 *&p, Constant *attribute_name) { + AnnotationsAttribute *attr = new AnnotationsAttribute; + attr->attribute_name_ = attribute_name; + u2 num_annotations = get_u2be(p); + for (int ii = 0; ii < num_annotations; ++ii) { + Annotation *annotation = Annotation::Read(p); + attr->annotations_.push_back(annotation); + } + return attr; + } + + virtual void ExtractClassNames() { + for (int i = 0; i < annotations_.size(); i++) { + annotations_[i]->ExtractClassNames(); + } + } + + void Write(u1 *&p) { + WriteProlog(p, -1); + u1 *payload_start = p - 4; + put_u2be(p, annotations_.size()); + for (size_t ii = 0; ii < annotations_.size(); ++ii) { + annotations_[ii]->Write(p); + } + put_u4be(payload_start, p - 4 - payload_start); // backpatch length + } + + std::vector annotations_; +}; + +// See sec.4.7.18-19 of JVM spec. Includes RuntimeVisible and +// RuntimeInvisible. +// +// We preserve all annotations. +struct ParameterAnnotationsAttribute : Attribute { + + static ParameterAnnotationsAttribute* Read(const u1 *&p, + Constant *attribute_name) { + ParameterAnnotationsAttribute *attr = new ParameterAnnotationsAttribute; + attr->attribute_name_ = attribute_name; + u1 num_parameters = get_u1(p); + for (int ii = 0; ii < num_parameters; ++ii) { + std::vector annotations; + u2 num_annotations = get_u2be(p); + for (int ii = 0; ii < num_annotations; ++ii) { + Annotation *annotation = Annotation::Read(p); + annotations.push_back(annotation); + } + attr->parameter_annotations_.push_back(annotations); + } + return attr; + } + + virtual void ExtractClassNames() { + for (size_t i = 0; i < parameter_annotations_.size(); i++) { + const std::vector& annotations = parameter_annotations_[i]; + for (size_t j = 0; j < annotations.size(); j++) { + annotations[j]->ExtractClassNames(); + } + } + } + + void Write(u1 *&p) { + WriteProlog(p, -1); + u1 *payload_start = p - 4; + put_u1(p, parameter_annotations_.size()); + for (size_t ii = 0; ii < parameter_annotations_.size(); ++ii) { + std::vector &annotations = parameter_annotations_[ii]; + put_u2be(p, annotations.size()); + for (size_t jj = 0; jj < annotations.size(); ++jj) { + annotations[jj]->Write(p); + } + } + put_u4be(payload_start, p - 4 - payload_start); // backpatch length + } + + std::vector > parameter_annotations_; +}; + +// See sec.4.7.20 of Java 8 JVM spec. Includes RuntimeVisibleTypeAnnotations +// and RuntimeInvisibleTypeAnnotations. +struct TypeAnnotationsAttribute : Attribute { + static TypeAnnotationsAttribute* Read(const u1 *&p, Constant *attribute_name, + u4 attribute_length) { + auto attr = new TypeAnnotationsAttribute; + attr->attribute_name_ = attribute_name; + u2 num_annotations = get_u2be(p); + for (int ii = 0; ii < num_annotations; ++ii) { + TypeAnnotation *annotation = TypeAnnotation::Read(p); + attr->type_annotations_.push_back(annotation); + } + return attr; + } + + virtual void ExtractClassNames() { + for (int i = 0; i < type_annotations_.size(); i++) { + type_annotations_[i]->ExtractClassNames(); + } + } + + void Write(u1 *&p) { + WriteProlog(p, -1); + u1 *payload_start = p - 4; + put_u2be(p, type_annotations_.size()); + for (TypeAnnotation *annotation : type_annotations_) { + annotation->Write(p); + } + put_u4be(payload_start, p - 4 - payload_start); // backpatch length + } + + std::vector type_annotations_; +}; + +struct GeneralAttribute : Attribute { + static GeneralAttribute* Read(const u1 *&p, Constant *attribute_name, + u4 attribute_length) { + auto attr = new GeneralAttribute; + attr->attribute_name_ = attribute_name; + attr->attribute_length_ = attribute_length; + attr->attribute_content_ = p; + p += attribute_length; + return attr; + } + + void Write(u1 *&p) { + WriteProlog(p, attribute_length_); + put_n(p, attribute_content_, attribute_length_); + } + + u4 attribute_length_; + const u1 *attribute_content_; +}; + +/********************************************************************** + * * + * ClassFile * + * * + **********************************************************************/ + +struct HasAttrs { + std::vector attributes; + + void WriteAttrs(u1 *&p); + void ReadAttrs(const u1 *&p); + + virtual ~HasAttrs() { + for (size_t i = 0; i < attributes.size(); i++) { + delete attributes[i]; + } + } + + void ExtractClassNames() { + for (int i = 0; i < attributes.size(); i++) { + attributes[i]->ExtractClassNames(); + } + } +}; + +// A field or method. +// See sec.4.5 and 4.6 of JVM spec. +struct Member : HasAttrs { + u2 access_flags; + Constant *name; + Constant *descriptor; + + static Member* Read(const u1 *&p) { + Member *m = new Member; + m->access_flags = get_u2be(p); + m->name = constant(get_u2be(p)); + m->descriptor = constant(get_u2be(p)); + m->ReadAttrs(p); + return m; + } + + void Write(u1 *&p) { + put_u2be(p, access_flags); + put_u2be(p, name->slot()); + put_u2be(p, descriptor->slot()); + WriteAttrs(p); + } +}; + +// See sec.4.1 of JVM spec. +struct ClassFile : HasAttrs { + + size_t length; + + // Header: + u4 magic; + u2 major; + u2 minor; + + // Body: + u2 access_flags; + Constant *this_class; + Constant *super_class; + std::vector interfaces; + std::vector fields; + std::vector methods; + + virtual ~ClassFile() { + for (size_t i = 0; i < fields.size(); i++) { + delete fields[i]; + } + + for (size_t i = 0; i < methods.size(); i++) { + delete methods[i]; + } + + // Constants do not need to be deleted; they are owned by the constant pool. + } + + void WriteClass(u1 *&p); + + bool ReadConstantPool(const u1 *&p); + + void StripIfAnonymous(); + + void WriteHeader(u1 *&p) { + put_u4be(p, magic); + put_u2be(p, major); + put_u2be(p, minor); + + put_u2be(p, const_pool_out.size()); + for (u2 ii = 1; ii < const_pool_out.size(); ++ii) { + if (const_pool_out[ii] != NULL) { // NB: NULLs appear after long/double. + const_pool_out[ii]->Write(p); + } + } + } + + void WriteBody(u1 *&p) { + put_u2be(p, access_flags); + put_u2be(p, this_class->slot()); + put_u2be(p, super_class == NULL ? 0 : super_class->slot()); + put_u2be(p, interfaces.size()); + for (size_t ii = 0; ii < interfaces.size(); ++ii) { + put_u2be(p, interfaces[ii]->slot()); + } + put_u2be(p, fields.size()); + for (size_t ii = 0; ii < fields.size(); ++ii) { + fields[ii]->Write(p); + } + put_u2be(p, methods.size()); + for (size_t ii = 0; ii < methods.size(); ++ii) { + methods[ii]->Write(p); + } + + Attribute* inner_classes = NULL; + + // Make the inner classes attribute the last, so that it can know which + // constants were needed + for (size_t ii = 0; ii < attributes.size(); ii++) { + if (attributes[ii]->attribute_name_->Display() == "InnerClasses") { + inner_classes = attributes[ii]; + attributes.erase(attributes.begin() + ii); + break; + } + } + + if (inner_classes != NULL) { + attributes.push_back(inner_classes); + } + + WriteAttrs(p); + } + +}; + +void HasAttrs::ReadAttrs(const u1 *&p) { + u2 attributes_count = get_u2be(p); + for (int ii = 0; ii < attributes_count; ii++) { + Constant *attribute_name = constant(get_u2be(p)); + u4 attribute_length = get_u4be(p); + + std::string attr_name = attribute_name->Display(); + if (attr_name == "SourceFile" || + attr_name == "LineNumberTable" || + attr_name == "LocalVariableTable" || + attr_name == "LocalVariableTypeTable" || + attr_name == "Code" || + attr_name == "Synthetic" || + attr_name == "BootstrapMethods") { + p += attribute_length; // drop these attributes + } else if (attr_name == "Exceptions") { + attributes.push_back(ExceptionsAttribute::Read(p, attribute_name)); + } else if (attr_name == "Signature") { + attributes.push_back(SignatureAttribute::Read(p, attribute_name)); + } else if (attr_name == "Deprecated") { + attributes.push_back(DeprecatedAttribute::Read(p, attribute_name)); + } else if (attr_name == "EnclosingMethod") { + attributes.push_back(EnclosingMethodAttribute::Read(p, attribute_name)); + } else if (attr_name == "InnerClasses") { + // TODO(bazel-team): omit private inner classes + attributes.push_back(InnerClassesAttribute::Read(p, attribute_name)); + } else if (attr_name == "AnnotationDefault") { + attributes.push_back(AnnotationDefaultAttribute::Read(p, attribute_name)); + } else if (attr_name == "ConstantValue") { + attributes.push_back(ConstantValueAttribute::Read(p, attribute_name)); + } else if (attr_name == "RuntimeVisibleAnnotations" || + attr_name == "RuntimeInvisibleAnnotations") { + attributes.push_back(AnnotationsAttribute::Read(p, attribute_name)); + } else if (attr_name == "RuntimeVisibleParameterAnnotations" || + attr_name == "RuntimeInvisibleParameterAnnotations") { + attributes.push_back( + ParameterAnnotationsAttribute::Read(p, attribute_name)); + } else if (attr_name == "Scala" || + attr_name == "ScalaSig" || + attr_name == "ScalaInlineInfo") { + // These are opaque blobs, so can be handled with a general + // attribute handler + attributes.push_back(GeneralAttribute::Read(p, attribute_name, + attribute_length)); + } else if (attr_name == "RuntimeVisibleTypeAnnotations" || + attr_name == "RuntimeInvisibleTypeAnnotations") { + // JSR 308: annotations on types. JDK 7 has no use for these yet, but the + // Checkers Framework relies on them. + attributes.push_back(TypeAnnotationsAttribute::Read(p, attribute_name, + attribute_length)); + } else { + // Skip over unknown attributes with a warning. The JVM spec + // says this is ok, so long as we handle the mandatory attributes. + fprintf(stderr, "ijar: skipping unknown attribute: \"%s\".\n", + attr_name.c_str()); + p += attribute_length; + } + } +} + +void HasAttrs::WriteAttrs(u1 *&p) { + u1* p_size = p; + + put_u2be(p, 0); + int n_written_attrs = 0; + for (size_t ii = 0; ii < attributes.size(); ii++) { + u1* before = p; + attributes[ii]->Write(p); + if (p != before) { + n_written_attrs++; + } + } + + put_u2be(p_size, n_written_attrs); +} + +// See sec.4.4 of JVM spec. +bool ClassFile::ReadConstantPool(const u1 *&p) { + + const_pool_in.clear(); + const_pool_in.push_back(NULL); // dummy first item + + u2 cp_count = get_u2be(p); + for (int ii = 1; ii < cp_count; ++ii) { + u1 tag = get_u1(p); + + if (devtools_ijar::verbose) { + fprintf(stderr, "cp[%d/%d] = tag %d\n", ii, cp_count, tag); + } + + switch(tag) { + case CONSTANT_Class: { + u2 name_index = get_u2be(p); + const_pool_in.push_back(new Constant_Class(name_index)); + break; + } + case CONSTANT_FieldRef: + case CONSTANT_Methodref: + case CONSTANT_Interfacemethodref: { + u2 class_index = get_u2be(p); + u2 nti = get_u2be(p); + const_pool_in.push_back(new Constant_FMIref(tag, class_index, nti)); + break; + } + case CONSTANT_String: { + u2 string_index = get_u2be(p); + const_pool_in.push_back(new Constant_String(string_index)); + break; + } + case CONSTANT_NameAndType: { + u2 name_index = get_u2be(p); + u2 descriptor_index = get_u2be(p); + const_pool_in.push_back( + new Constant_NameAndType(name_index, descriptor_index)); + break; + } + case CONSTANT_Utf8: { + u2 length = get_u2be(p); + if (devtools_ijar::verbose) { + fprintf(stderr, "Utf8: \"%s\" (%d)\n", + std::string((const char*) p, length).c_str(), length); + } + + const_pool_in.push_back(new Constant_Utf8(length, p)); + p += length; + break; + } + case CONSTANT_Integer: + case CONSTANT_Float: { + u4 bytes = get_u4be(p); + const_pool_in.push_back(new Constant_IntegerOrFloat(tag, bytes)); + break; + } + case CONSTANT_Long: + case CONSTANT_Double: { + u4 high_bytes = get_u4be(p); + u4 low_bytes = get_u4be(p); + const_pool_in.push_back( + new Constant_LongOrDouble(tag, high_bytes, low_bytes)); + // Longs and doubles occupy two constant pool slots. + // ("In retrospect, making 8-byte constants take two "constant + // pool entries was a poor choice." --JVM Spec.) + const_pool_in.push_back(NULL); + ii++; + break; + } + case CONSTANT_MethodHandle: { + u1 reference_kind = get_u1(p); + u2 reference_index = get_u2be(p); + const_pool_in.push_back( + new Constant_MethodHandle(reference_kind, reference_index)); + break; + } + case CONSTANT_MethodType: { + u2 descriptor_index = get_u2be(p); + const_pool_in.push_back(new Constant_MethodType(descriptor_index)); + break; + } + case CONSTANT_InvokeDynamic: { + u2 bootstrap_method_attr = get_u2be(p); + u2 name_name_type_index = get_u2be(p); + const_pool_in.push_back(new Constant_InvokeDynamic( + bootstrap_method_attr, name_name_type_index)); + break; + } + default: { + fprintf(stderr, "Unknown constant: %02x. Passing class through.\n", + tag); + return false; + } + } + } + + return true; +} + +// Anonymous inner classes are stripped to opaque classes that only extend +// Object. None of their methods or fields are accessible anyway. +void ClassFile::StripIfAnonymous() { + int enclosing_index = -1; + int inner_classes_index = -1; + + for (size_t ii = 0; ii < attributes.size(); ++ii) { + if (attributes[ii]->attribute_name_->Display() == "EnclosingMethod") { + enclosing_index = ii; + } else if (attributes[ii]->attribute_name_->Display() == "InnerClasses") { + inner_classes_index = ii; + } + } + + // Presence of an EnclosingMethod attribute indicates a local or anonymous + // class, which can be stripped. + if (enclosing_index > -1) { + // Clear the signature to only extend java.lang.Object. + super_class = NULL; + interfaces.clear(); + + // Clear away all fields (implementation details). + for (size_t ii = 0; ii < fields.size(); ++ii) { + delete fields[ii]; + } + fields.clear(); + + // Clear away all methods (implementation details). + for (size_t ii = 0; ii < methods.size(); ++ii) { + delete methods[ii]; + } + methods.clear(); + + // Only preserve the InnerClasses attribute to comply with the spec. + Attribute *attr = NULL; + for (size_t ii = 0; ii < attributes.size(); ++ii) { + if (static_cast(ii) != inner_classes_index) { + delete attributes[ii]; + } else { + attr = attributes[ii]; + } + } + attributes.clear(); + if (attr != NULL) { + attributes.push_back(attr); + } + } +} + +static ClassFile *ReadClass(const void *classdata, size_t length) { + const u1 *p = (u1*) classdata; + + ClassFile *clazz = new ClassFile; + + clazz->length = length; + + clazz->magic = get_u4be(p); + if (clazz->magic != 0xCAFEBABE) { + fprintf(stderr, "Bad magic %" PRIx32 "\n", clazz->magic); + abort(); + } + clazz->major = get_u2be(p); + clazz->minor = get_u2be(p); + + if (!clazz->ReadConstantPool(p)) { + delete clazz; + return NULL; + } + + clazz->access_flags = get_u2be(p); + clazz->this_class = constant(get_u2be(p)); + class_name = clazz->this_class; + + u2 super_class_id = get_u2be(p); + clazz->super_class = super_class_id == 0 ? NULL : constant(super_class_id); + + u2 interfaces_count = get_u2be(p); + for (int ii = 0; ii < interfaces_count; ++ii) { + clazz->interfaces.push_back(constant(get_u2be(p))); + } + + u2 fields_count = get_u2be(p); + for (int ii = 0; ii < fields_count; ++ii) { + Member *field = Member::Read(p); + + if (!(field->access_flags & ACC_PRIVATE)) { // drop private fields + clazz->fields.push_back(field); + } + } + + u2 methods_count = get_u2be(p); + for (int ii = 0; ii < methods_count; ++ii) { + Member *method = Member::Read(p); + + // drop class initializers + if (method->name->Display() == "") continue; + + if (!(method->access_flags & ACC_PRIVATE)) { // drop private methods + clazz->methods.push_back(method); + } + } + + clazz->ReadAttrs(p); + clazz->StripIfAnonymous(); + + return clazz; +} + +// In theory, '/' is also reserved, but it's okay if we just parse package +// identifiers as part of the class name. Note that signatures are UTF-8, but +// this works just as well as in plain ASCII. +static const char *SIGNATURE_NON_IDENTIFIER_CHARS = ".;[<>:"; + +void Expect(const std::string& desc, size_t* p, char expected) { + if (desc[*p] != expected) { + fprintf(stderr, "Expected '%c' in '%s' at %zd in signature\n", + expected, desc.substr(*p).c_str(), *p); + exit(1); + } + + *p += 1; +} + +// These functions form a crude recursive descent parser for descriptors and +// signatures in class files (see JVM spec 4.3). +// +// This parser is a bit more liberal than the spec, but this should be fine, +// because it accepts all valid class files and croaks only on invalid ones. +void ParseFromClassTypeSignature(const std::string& desc, size_t* p); +void ParseSimpleClassTypeSignature(const std::string& desc, size_t* p); +void ParseClassTypeSignatureSuffix(const std::string& desc, size_t* p); +void ParseIdentifier(const std::string& desc, size_t* p); +void ParseTypeArgumentsOpt(const std::string& desc, size_t* p); +void ParseMethodDescriptor(const std::string& desc, size_t* p); + +void ParseClassTypeSignature(const std::string& desc, size_t* p) { + Expect(desc, p, 'L'); + ParseSimpleClassTypeSignature(desc, p); + ParseClassTypeSignatureSuffix(desc, p); + Expect(desc, p, ';'); +} + +void ParseSimpleClassTypeSignature(const std::string& desc, size_t* p) { + ParseIdentifier(desc, p); + ParseTypeArgumentsOpt(desc, p); +} + +void ParseClassTypeSignatureSuffix(const std::string& desc, size_t* p) { + while (desc[*p] == '.') { + *p += 1; + ParseSimpleClassTypeSignature(desc, p); + } +} + +void ParseIdentifier(const std::string& desc, size_t* p) { + size_t next = desc.find_first_of(SIGNATURE_NON_IDENTIFIER_CHARS, *p); + std::string id = desc.substr(*p, next - *p); + used_class_names.insert(id); + *p = next; +} + +void ParseTypeArgumentsOpt(const std::string& desc, size_t* p) { + if (desc[*p] != '<') { + return; + } + + *p += 1; + while (desc[*p] != '>') { + switch (desc[*p]) { + case '*': + *p += 1; + break; + + case '+': + case '-': + *p += 1; + ExtractClassNames(desc, p); + break; + + default: + ExtractClassNames(desc, p); + break; + } + } + + *p += 1; +} + +void ParseMethodDescriptor(const std::string& desc, size_t* p) { + Expect(desc, p, '('); + while (desc[*p] != ')') { + ExtractClassNames(desc, p); + } + + Expect(desc, p, ')'); + ExtractClassNames(desc, p); +} + +void ParseFormalTypeParameters(const std::string& desc, size_t* p) { + Expect(desc, p, '<'); + while (desc[*p] != '>') { + ParseIdentifier(desc, p); + Expect(desc, p, ':'); + if (desc[*p] != ':' && desc[*p] != '>') { + ExtractClassNames(desc, p); + } + + while (desc[*p] == ':') { + Expect(desc, p, ':'); + ExtractClassNames(desc, p); + } + } + + Expect(desc, p, '>'); +} + +void ExtractClassNames(const std::string& desc, size_t* p) { + switch (desc[*p]) { + case '<': + ParseFormalTypeParameters(desc, p); + ExtractClassNames(desc, p); + break; + + case 'L': + ParseClassTypeSignature(desc, p); + break; + + case '[': + *p += 1; + ExtractClassNames(desc, p); + break; + + case 'T': + *p += 1; + ParseIdentifier(desc, p); + Expect(desc, p, ';'); + break; + + case '(': + ParseMethodDescriptor(desc, p); + break; + + case 'B': + case 'C': + case 'D': + case 'F': + case 'I': + case 'J': + case 'S': + case 'Z': + case 'V': + *p += 1; + break; + + default: + fprintf(stderr, "Invalid signature %s\n", desc.substr(*p).c_str()); + } +} + +void ClassFile::WriteClass(u1 *&p) { + used_class_names.clear(); + std::vector members; + members.insert(members.end(), fields.begin(), fields.end()); + members.insert(members.end(), methods.begin(), methods.end()); + ExtractClassNames(); + for (int i = 0; i < members.size(); i++) { + Member *member = members[i]; + size_t idx = 0; + devtools_ijar::ExtractClassNames(member->descriptor->Display(), &idx); + member->ExtractClassNames(); + } + + // We have to write the body out before the header in order to reference + // the essential constants and populate the output constant pool: + u1 *body = new u1[length]; + u1 *q = body; + WriteBody(q); // advances q + u4 body_length = q - body; + + WriteHeader(p); // advances p + put_n(p, body, body_length); + delete[] body; +} + + +void StripClass(u1 *&classdata_out, const u1 *classdata_in, size_t in_length) { + ClassFile *clazz = ReadClass(classdata_in, in_length); + if (clazz == NULL) { + // Class is invalid. Simply copy it to the output and call it a day. + put_n(classdata_out, classdata_in, in_length); + } else { + + // Constant pool item zero is a dummy entry. Setting it marks the + // beginning of the output phase; calls to Constant::slot() will + // fail if called prior to this. + const_pool_out.push_back(NULL); + clazz->WriteClass(classdata_out); + + delete clazz; + } + + // Now clean up all the mess we left behind. + + for (size_t i = 0; i < const_pool_in.size(); i++) { + delete const_pool_in[i]; + } + + const_pool_in.clear(); + const_pool_out.clear(); +} + +} // namespace devtools_ijar diff --git a/tools/ijar/common.h b/tools/ijar/common.h new file mode 100644 index 000000000..118041b85 --- /dev/null +++ b/tools/ijar/common.h @@ -0,0 +1,102 @@ +// Copyright 2001,2007 Alan Donovan. All rights reserved. +// +// Author: Alan Donovan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// common.h -- common definitions. +// + +#ifndef INCLUDED_DEVTOOLS_IJAR_COMMON_H +#define INCLUDED_DEVTOOLS_IJAR_COMMON_H + +#include +#include +#include + +namespace devtools_ijar { + +typedef unsigned long long u8; +typedef uint32_t u4; +typedef uint16_t u2; +typedef uint8_t u1; + +// be = big endian, le = little endian + +inline u1 get_u1(const u1 *&p) { + return *p++; +} + +inline u2 get_u2be(const u1 *&p) { + u4 x = (p[0] << 8) | p[1]; + p += 2; + return x; +} + +inline u2 get_u2le(const u1 *&p) { + u4 x = (p[1] << 8) | p[0]; + p += 2; + return x; +} + +inline u4 get_u4be(const u1 *&p) { + u4 x = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; + p += 4; + return x; +} + +inline u4 get_u4le(const u1 *&p) { + u4 x = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]; + p += 4; + return x; +} + +inline void put_u1(u1 *&p, u1 x) { + *p++ = x; +} + +inline void put_u2be(u1 *&p, u2 x) { + *p++ = x >> 8; + *p++ = x & 0xff; +} + +inline void put_u2le(u1 *&p, u2 x) { + *p++ = x & 0xff; + *p++ = x >> 8;; +} + +inline void put_u4be(u1 *&p, u4 x) { + *p++ = x >> 24; + *p++ = (x >> 16) & 0xff; + *p++ = (x >> 8) & 0xff; + *p++ = x & 0xff; +} + +inline void put_u4le(u1 *&p, u4 x) { + *p++ = x & 0xff; + *p++ = (x >> 8) & 0xff; + *p++ = (x >> 16) & 0xff; + *p++ = x >> 24; +} + +// Copy n bytes from src to p, and advance p. +inline void put_n(u1 *&p, const u1 *src, size_t n) { + memcpy(p, src, n); + p += n; +} + +extern bool verbose; + +} // namespace devtools_ijar + +#endif // INCLUDED_DEVTOOLS_IJAR_COMMON_H diff --git a/tools/ijar/ijar.cc b/tools/ijar/ijar.cc new file mode 100644 index 000000000..1925b48ba --- /dev/null +++ b/tools/ijar/ijar.cc @@ -0,0 +1,182 @@ +// Copyright 2001,2007 Alan Donovan. All rights reserved. +// +// Author: Alan Donovan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ijar.cpp -- .jar -> _interface.jar tool. +// + +#include +#include +#include +#include +#include +#include + +#include "zip.h" + +namespace devtools_ijar { + +bool verbose = false; + +// Reads a JVM class from classdata_in (of the specified length), and +// writes out a simplified class to classdata_out, advancing the +// pointer. +void StripClass(u1 *&classdata_out, const u1 *classdata_in, size_t in_length); + +const char* CLASS_EXTENSION = ".class"; +const size_t CLASS_EXTENSION_LENGTH = strlen(CLASS_EXTENSION); + +// ZipExtractorProcessor that select only .class file and use +// StripClass to generate an interface class, storing as a new file +// in the specified ZipBuilder. +class JarStripperProcessor : public ZipExtractorProcessor { + public: + JarStripperProcessor() {} + virtual ~JarStripperProcessor() {} + + virtual void Process(const char* filename, const u4 attr, + const u1* data, const size_t size); + virtual bool Accept(const char* filename, const u4 attr); + + private: + // Not owned by JarStripperProcessor, see SetZipBuilder(). + ZipBuilder* builder; + + public: + // Set the ZipBuilder to add the ijar class to the output zip file. + // This pointer should not be deleted while this class is still in use and + // it should be set before any call to the Process() method. + void SetZipBuilder(ZipBuilder* builder) { + this->builder = builder; + } +}; + +bool JarStripperProcessor::Accept(const char* filename, const u4) { + ssize_t offset = strlen(filename) - CLASS_EXTENSION_LENGTH; + if (offset >= 0) { + return strcmp(filename + offset, CLASS_EXTENSION) == 0; + } + return false; +} + +void JarStripperProcessor::Process(const char* filename, const u4, + const u1* data, const size_t size) { + if (verbose) { + fprintf(stderr, "INFO: StripClass: %s\n", filename); + } + u1 *q = builder->NewFile(filename, 0); + u1 *classdata_out = q; + StripClass(q, data, size); // actually process it + size_t out_length = q - classdata_out; + builder->FinishFile(out_length); +} + +// Opens "file_in" (a .jar file) for reading, and writes an interface +// .jar to "file_out". +void OpenFilesAndProcessJar(const char *file_out, const char *file_in) { + JarStripperProcessor processor; + std::unique_ptr in(ZipExtractor::Create(file_in, &processor)); + if (in.get() == NULL) { + fprintf(stderr, "Unable to open Zip file %s: %s\n", file_in, + strerror(errno)); + abort(); + } + u8 output_length = in->CalculateOutputLength(); + std::unique_ptr out(ZipBuilder::Create(file_out, output_length)); + if (out.get() == NULL) { + fprintf(stderr, "Unable to open output file %s: %s\n", file_out, + strerror(errno)); + abort(); + } + processor.SetZipBuilder(out.get()); + + // Process all files in the zip + if (in->ProcessAll() < 0) { + fprintf(stderr, "%s\n", in->GetError()); + abort(); + } + + // Add dummy file, since javac doesn't like truly empty jars. + if (out->GetNumberFiles() == 0) { + out->WriteEmptyFile("dummy"); + } + // Finish writing the output file + if (out->Finish() < 0) { + fprintf(stderr, "%s\n", out->GetError()); + abort(); + } + // Get all file size + size_t in_length = in->GetSize(); + size_t out_length = out->GetSize(); + if (verbose) { + fprintf(stderr, "INFO: produced interface jar: %s -> %s (%d%%).\n", + file_in, file_out, + static_cast(100.0 * out_length / in_length)); + } +} + +} // namespace devtools_ijar + +// +// main method +// +static void usage() { + fprintf(stderr, "Usage: ijar [-v] x.jar [x_interface.jar>]\n"); + fprintf(stderr, "Creates an interface jar from the specified jar file.\n"); + exit(1); +} + +int main(int argc, char **argv) { + const char *filename_in = NULL; + const char *filename_out = NULL; + + for (int ii = 1; ii < argc; ++ii) { + if (strcmp(argv[ii], "-v") == 0) { + devtools_ijar::verbose = true; + } else if (filename_in == NULL) { + filename_in = argv[ii]; + } else if (filename_out == NULL) { + filename_out = argv[ii]; + } else { + usage(); + } + } + + if (filename_in == NULL) { + usage(); + } + + // Guess output filename from input: + char filename_out_buf[PATH_MAX]; + if (filename_out == NULL) { + size_t len = strlen(filename_in); + if (len > 4 && strncmp(filename_in + len - 4, ".jar", 4) == 0) { + strcpy(filename_out_buf, filename_in); + strcpy(filename_out_buf + len - 4, "-interface.jar"); + filename_out = filename_out_buf; + } else { + fprintf(stderr, "Can't determine output filename since input filename " + "doesn't end with '.jar'.\n"); + return 1; + } + } + + if (devtools_ijar::verbose) { + fprintf(stderr, "INFO: writing to '%s'.\n", filename_out); + } + + devtools_ijar::OpenFilesAndProcessJar(filename_out, filename_in); + return 0; +} diff --git a/tools/ijar/zip.cc b/tools/ijar/zip.cc new file mode 100644 index 000000000..ca5f39670 --- /dev/null +++ b/tools/ijar/zip.cc @@ -0,0 +1,1031 @@ +// Copyright 2007 Alan Donovan. All rights reserved. +// +// Author: Alan Donovan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// zip.cc -- .zip (.jar) file reading/writing routines. +// + +// See README.txt for details. +// +// See http://www.pkware.com/documents/casestudies/APPNOTE.TXT +// for definition of PKZIP file format. + +#define _FILE_OFFSET_BITS 64 // Support zip files larger than 2GB + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zip.h" +#include + +#define LOCAL_FILE_HEADER_SIGNATURE 0x04034b50 +#define CENTRAL_FILE_HEADER_SIGNATURE 0x02014b50 +#define END_OF_CENTRAL_DIR_SIGNATURE 0x06054b50 +#define DATA_DESCRIPTOR_SIGNATURE 0x08074b50 + +// version to extract: 1.0 - default value from APPNOTE.TXT. +// Output JAR files contain no extra ZIP features, so this is enough. +#define ZIP_VERSION_TO_EXTRACT 10 +#define COMPRESSION_METHOD_STORED 0 // no compression +#define COMPRESSION_METHOD_DEFLATED 8 + +#define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3) +#define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11) +#define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1)) +#define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \ + (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \ + | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \ + | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED) + +namespace devtools_ijar { +// In the absence of ZIP64 support, zip files are limited to 4GB. +// http://www.info-zip.org/FAQ.html#limits +static const u8 kMaximumOutputSize = std::numeric_limits::max(); + +static bool ProcessCentralDirEntry(const u1 *&p, + size_t *compressed_size, + size_t *uncompressed_size, + char *filename, + size_t filename_size, + u4 *attr, + u4 *offset); + +// +// A class representing a ZipFile for reading. Its public API is exposed +// using the ZipExtractor abstract class. +// +class InputZipFile : public ZipExtractor { + public: + InputZipFile(ZipExtractorProcessor *processor, int fd, off_t in_length, + off_t in_offset, const u1* zipdata_in, const u1* central_dir); + virtual ~InputZipFile(); + + virtual const char* GetError() { + if (errmsg[0] == 0) { + return NULL; + } + return errmsg; + } + + virtual bool ProcessNext(); + virtual void Reset(); + virtual size_t GetSize() { + return in_length_; + } + + virtual u8 CalculateOutputLength(); + + private: + ZipExtractorProcessor *processor; + + int fd_in; // Input file descripor + + // InputZipFile is responsible for maintaining the following + // pointers. They are allocated by the Create() method before + // the object is actually created using mmap. + const u1 * const zipdata_in_; // start of input file mmap + const u1 * zipdata_in_mapped_; // start of still mapped region + const u1 * const central_dir_; // central directory in input file + + size_t in_length_; // size of the input file + size_t in_offset_; // offset the input file + + const u1 *p; // input cursor + + const u1* central_dir_current_; // central dir input cursor + + // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every + // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is + // not enough, we bail out. We only decompress class files, so they should + // be smaller than 64K anyway, but we give a little leeway. + // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the + // ZIP. It is set to 128M here so we can uncompress the Bazel server with + // this library. + static const size_t INITIAL_BUFFER_SIZE = 256 * 1024; // 256K + static const size_t MAX_BUFFER_SIZE = 128 * 1024 * 1024; + static const size_t MAX_MAPPED_REGION = 32 * 1024 * 1024; + + // These metadata fields are the fields of the ZIP header of the file being + // processed. + u2 extract_version_; + u2 general_purpose_bit_flag_; + u2 compression_method_; + u4 uncompressed_size_; + u4 compressed_size_; + u2 file_name_length_; + u2 extra_field_length_; + const u1 *file_name_; + const u1 *extra_field_; + + // Administration of memory reserved for decompressed data. We use the same + // buffer for each file to avoid some malloc()/free() calls and free the + // memory only in the dtor. C-style memory management is used so that we + // can call realloc. + u1 *uncompressed_data_; + size_t uncompressed_data_allocated_; + + // Copy of the last filename entry - Null-terminated. + char filename[PATH_MAX]; + // The external file attribute field + u4 attr; + + // last error + char errmsg[4*PATH_MAX]; + + int error(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vsnprintf(errmsg, 4*PATH_MAX, fmt, ap); + va_end(ap); + return -1; + } + + // Check that at least n bytes remain in the input file, otherwise + // abort with an error message. "state" is the name of the field + // we're about to read, for diagnostics. + int EnsureRemaining(size_t n, const char *state) { + size_t in_offset = p - zipdata_in_; + size_t remaining = in_length_ - in_offset; + if (n > remaining) { + return error("Premature end of file (at offset %zd, state=%s); " + "expected %zd more bytes but found %zd.\n", + in_offset, state, n, remaining); + } + return 0; + } + + // Read one entry from input zip file + int ProcessLocalFileEntry(size_t compressed_size, size_t uncompressed_size); + + // Uncompress a file from the archive using zlib. The pointer returned + // is owned by InputZipFile, so it must not be freed. Advances the input + // cursor to the first byte after the compressed data. + u1* UncompressFile(); + + // Skip a file + int SkipFile(const bool compressed); + + // Process a file + int ProcessFile(const bool compressed); +}; + +// +// A class implementing ZipBuilder that represent an open zip file for writing. +// +class OutputZipFile : public ZipBuilder { + public: + OutputZipFile(int fd, u1 * const zipdata_out) : + fd_out(fd), + zipdata_out_(zipdata_out), + q(zipdata_out) { + errmsg[0] = 0; + } + + virtual const char* GetError() { + if (errmsg[0] == 0) { + return NULL; + } + return errmsg; + } + + virtual ~OutputZipFile() { Finish(); } + virtual u1* NewFile(const char* filename, const u4 attr); + virtual int FinishFile(size_t filelength, bool compress = false, + bool compute_crc = false); + virtual int WriteEmptyFile(const char *filename); + virtual size_t GetSize() { + return Offset(q); + } + virtual int GetNumberFiles() { + return entries_.size(); + } + virtual int Finish(); + + private: + struct LocalFileEntry { + // Start of the local header (in the output buffer). + size_t local_header_offset; + + // Sizes of the file entry + size_t uncompressed_length; + size_t compressed_length; + + // Compression method + u2 compression_method; + + // CRC32 + u4 crc32; + + // external attributes field + u4 external_attr; + + // Start/length of the file_name in the local header. + u1 *file_name; + u2 file_name_length; + + // Start/length of the extra_field in the local header. + const u1 *extra_field; + u2 extra_field_length; + }; + + int fd_out; // file descriptor for the output file + + // OutputZipFile is responsible for maintaining the following + // pointers. They are allocated by the Create() method before + // the object is actually created using mmap. + u1 * const zipdata_out_; // start of output file mmap + u1 *q; // output cursor + + u1 *header_ptr; // Current pointer to "compression method" entry. + + // List of entries to write the central directory + std::vector entries_; + + // last error + char errmsg[4*PATH_MAX]; + + int error(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vsnprintf(errmsg, 4*PATH_MAX, fmt, ap); + va_end(ap); + return -1; + } + + // Write the ZIP central directory structure for each local file + // entry in "entries". + void WriteCentralDirectory(); + + // Returns the offset of the pointer relative to the start of the + // output zip file. + size_t Offset(const u1 *const x) { + return x - zipdata_out_; + } + + // Write ZIP file header in the output. Since the compressed size is not + // known in advance, it must be recorded later. This method returns a pointer + // to "compressed size" in the file header that should be passed to + // WriteFileSizeInLocalFileHeader() later. + u1* WriteLocalFileHeader(const char *filename, const u4 attr); + + // Fill in the "compressed size" and "uncompressed size" fields in a local + // file header previously written by WriteLocalFileHeader(). + size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr, + size_t out_length, + bool compress = false, + const u4 crc = 0); +}; + +// +// Implementation of InputZipFile +// +bool InputZipFile::ProcessNext() { + // Process the next entry in the central directory. Also make sure that the + // content pointer is in sync. + size_t compressed, uncompressed; + u4 offset; + if (!ProcessCentralDirEntry(central_dir_current_, &compressed, &uncompressed, + filename, PATH_MAX, &attr, &offset)) { + return false; + } + + // There might be an offset specified in the central directory that does + // not match the file offset, if so, correct the pointer. + if (offset != 0 && (p != (zipdata_in_ + in_offset_ + offset))) { + p = zipdata_in_ + offset; + } + + if (EnsureRemaining(4, "signature") < 0) { + return false; + } + u4 signature = get_u4le(p); + if (signature == LOCAL_FILE_HEADER_SIGNATURE) { + if (ProcessLocalFileEntry(compressed, uncompressed) < 0) { + return false; + } + } else { + error("local file header signature for file %s not found\n", filename); + return false; + } + + return true; +} + +int InputZipFile::ProcessLocalFileEntry( + size_t compressed_size, size_t uncompressed_size) { + if (EnsureRemaining(26, "extract_version") < 0) { + return -1; + } + extract_version_ = get_u2le(p); + general_purpose_bit_flag_ = get_u2le(p); + + if ((general_purpose_bit_flag_ & ~GENERAL_PURPOSE_BIT_FLAG_SUPPORTED) != 0) { + return error("Unsupported value (0x%04x) in general purpose bit flag.\n", + general_purpose_bit_flag_); + } + + compression_method_ = get_u2le(p); + + if (compression_method_ != COMPRESSION_METHOD_DEFLATED && + compression_method_ != COMPRESSION_METHOD_STORED) { + return error("Unsupported compression method (%d).\n", + compression_method_); + } + + // skip over: last_mod_file_time, last_mod_file_date, crc32 + p += 2 + 2 + 4; + compressed_size_ = get_u4le(p); + uncompressed_size_ = get_u4le(p); + file_name_length_ = get_u2le(p); + extra_field_length_ = get_u2le(p); + + if (EnsureRemaining(file_name_length_, "file_name") < 0) { + return -1; + } + file_name_ = p; + p += file_name_length_; + + if (EnsureRemaining(extra_field_length_, "extra_field") < 0) { + return -1; + } + extra_field_ = p; + p += extra_field_length_; + + bool is_compressed = compression_method_ == COMPRESSION_METHOD_DEFLATED; + + // If the zip is compressed, compressed and uncompressed size members are + // zero in the local file header. If not, check that they are the same as the + // lengths from the central directory, otherwise, just believe the central + // directory + if (compressed_size_ == 0) { + compressed_size_ = compressed_size; + } else { + if (compressed_size_ != compressed_size) { + return error("central directory and file header inconsistent\n"); + } + } + + if (uncompressed_size_ == 0) { + uncompressed_size_ = uncompressed_size; + } else { + if (uncompressed_size_ != uncompressed_size) { + return error("central directory and file header inconsistent\n"); + } + } + + if (processor->Accept(filename, attr)) { + if (ProcessFile(is_compressed) < 0) { + return -1; + } + } else { + if (SkipFile(is_compressed) < 0) { + return -1; + } + } + + if (general_purpose_bit_flag_ & GENERAL_PURPOSE_BIT_FLAG_COMPRESSED) { + // Skip the data descriptor. Some implementations do not put the signature + // here, so check if the next 4 bytes are a signature, and if so, skip the + // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip + // the next 8 bytes (because the value just read was the CRC). + u4 signature = get_u4le(p); + if (signature == DATA_DESCRIPTOR_SIGNATURE) { + p += 4 * 3; + } else { + p += 4 * 2; + } + } + + if (p > zipdata_in_mapped_ + MAX_MAPPED_REGION) { + munmap(const_cast(zipdata_in_mapped_), MAX_MAPPED_REGION); + zipdata_in_mapped_ += MAX_MAPPED_REGION; + } + + return 0; +} + +int InputZipFile::SkipFile(const bool compressed) { + if (!compressed) { + // In this case, compressed_size_ == uncompressed_size_ (since the file is + // uncompressed), so we can use either. + if (compressed_size_ != uncompressed_size_) { + return error("compressed size != uncompressed size, although the file " + "is uncompressed.\n"); + } + } + + if (EnsureRemaining(compressed_size_, "file_data") < 0) { + return -1; + } + p += compressed_size_; + return 0; +} + +u1* InputZipFile::UncompressFile() { + size_t in_offset = p - zipdata_in_; + size_t remaining = in_length_ - in_offset; + z_stream stream; + + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; + stream.opaque = Z_NULL; + stream.avail_in = remaining; + stream.next_in = (Bytef *) p; + + int ret = inflateInit2(&stream, -MAX_WBITS); + if (ret != Z_OK) { + error("inflateInit: %d\n", ret); + return NULL; + } + + int uncompressed_until_now = 0; + + while (true) { + stream.avail_out = uncompressed_data_allocated_ - uncompressed_until_now; + stream.next_out = uncompressed_data_ + uncompressed_until_now; + int old_avail_out = stream.avail_out; + + ret = inflate(&stream, Z_SYNC_FLUSH); + int uncompressed_now = old_avail_out - stream.avail_out; + uncompressed_until_now += uncompressed_now; + + switch (ret) { + case Z_STREAM_END: { + // zlib said that there is no more data to decompress. + + u1 *new_p = reinterpret_cast(stream.next_in); + compressed_size_ = new_p - p; + uncompressed_size_ = uncompressed_until_now; + p = new_p; + inflateEnd(&stream); + return uncompressed_data_; + } + + case Z_OK: { + // zlib said that there is no more room in the buffer allocated for + // the decompressed data. Enlarge that buffer and try again. + + if (uncompressed_data_allocated_ == MAX_BUFFER_SIZE) { + error("ijar does not support decompressing files " + "larger than %dMB.\n", + (int) (MAX_BUFFER_SIZE/(1024*1024))); + return NULL; + } + + uncompressed_data_allocated_ *= 2; + if (uncompressed_data_allocated_ > MAX_BUFFER_SIZE) { + uncompressed_data_allocated_ = MAX_BUFFER_SIZE; + } + + uncompressed_data_ = reinterpret_cast( + realloc(uncompressed_data_, uncompressed_data_allocated_)); + break; + } + + case Z_DATA_ERROR: + case Z_BUF_ERROR: + case Z_STREAM_ERROR: + case Z_NEED_DICT: + default: { + error("zlib returned error code %d during inflate.\n", ret); + return NULL; + } + } + } +} + +int InputZipFile::ProcessFile(const bool compressed) { + const u1 *file_data; + if (compressed) { + file_data = UncompressFile(); + if (file_data == NULL) { + return -1; + } + } else { + // In this case, compressed_size_ == uncompressed_size_ (since the file is + // uncompressed), so we can use either. + if (compressed_size_ != uncompressed_size_) { + return error("compressed size != uncompressed size, although the file " + "is uncompressed.\n"); + } + + if (EnsureRemaining(compressed_size_, "file_data") < 0) { + return -1; + } + file_data = p; + p += compressed_size_; + } + processor->Process(filename, attr, file_data, uncompressed_size_); + return 0; +} + + +// Reads and returns some metadata of the next file from the central directory: +// - compressed size +// - uncompressed size +// - whether the entry is a class file (to be included in the output). +// Precondition: p points to the beginning of an entry in the central dir +// Postcondition: p points to the beginning of the next entry in the central dir +// Returns true if the central directory contains another file and false if not. +// Of course, in the latter case, the size output variables are not changed. +// Note that the central directory is always followed by another data structure +// that has a signature, so parsing it this way is safe. +static bool ProcessCentralDirEntry( + const u1 *&p, size_t *compressed_size, size_t *uncompressed_size, + char *filename, size_t filename_size, u4 *attr, u4 *offset) { + u4 signature = get_u4le(p); + if (signature != CENTRAL_FILE_HEADER_SIGNATURE) { + return false; + } + + p += 16; // skip to 'compressed size' field + *compressed_size = get_u4le(p); + *uncompressed_size = get_u4le(p); + u2 file_name_length = get_u2le(p); + u2 extra_field_length = get_u2le(p); + u2 file_comment_length = get_u2le(p); + p += 4; // skip to external file attributes field + *attr = get_u4le(p); + *offset = get_u4le(p); + { + size_t len = (file_name_length < filename_size) + ? file_name_length + : (filename_size - 1); + memcpy(reinterpret_cast(filename), p, len); + filename[len] = 0; + } + p += file_name_length; + p += extra_field_length; + p += file_comment_length; + return true; +} + +// Gives a maximum bound on the size of the interface JAR. Basically, adds +// the difference between the compressed and uncompressed sizes to the size +// of the input file. +u8 InputZipFile::CalculateOutputLength() { + const u1* current = central_dir_; + + u8 compressed_size = 0; + u8 uncompressed_size = 0; + u8 skipped_compressed_size = 0; + u4 attr; + u4 offset; + char filename[PATH_MAX]; + + while (true) { + size_t file_compressed, file_uncompressed; + if (!ProcessCentralDirEntry(current, + &file_compressed, &file_uncompressed, + filename, PATH_MAX, &attr, &offset)) { + break; + } + + if (processor->Accept(filename, attr)) { + compressed_size += (u8) file_compressed; + uncompressed_size += (u8) file_uncompressed; + } else { + skipped_compressed_size += file_compressed; + } + } + + // The worst case is when the output is simply the input uncompressed. The + // metadata in the zip file will stay the same, so the file will grow by the + // difference between the compressed and uncompressed sizes. + return (u8) in_length_ - skipped_compressed_size + + (uncompressed_size - compressed_size); +} + +// Given the data in the zip file, returns the offset of the central directory +// and the number of files contained in it. +bool FindZipCentralDirectory(const u1* bytes, size_t in_length, + u4* offset, const u1** central_dir) { + static const int MAX_COMMENT_LENGTH = 0xffff; + static const int CENTRAL_DIR_LOCATOR_SIZE = 22; + // Maximum distance of start of central dir locator from end of file + static const int MAX_DELTA = MAX_COMMENT_LENGTH + CENTRAL_DIR_LOCATOR_SIZE; + const u1* last_pos_to_check = in_length < MAX_DELTA + ? bytes + : bytes + (in_length - MAX_DELTA); + const u1* current; + bool found = false; + + for (current = bytes + in_length - CENTRAL_DIR_LOCATOR_SIZE; + current >= last_pos_to_check; + current-- ) { + const u1* p = current; + if (get_u4le(p) != END_OF_CENTRAL_DIR_SIGNATURE) { + continue; + } + + p += 16; // skip to comment length field + u2 comment_length = get_u2le(p); + + // Does the comment go exactly till the end of the file? + if (current + comment_length + CENTRAL_DIR_LOCATOR_SIZE + != bytes + in_length) { + continue; + } + + // Hooray, we found it! + found = true; + break; + } + + if (!found) { + fprintf(stderr, "file is invalid or corrupted (missing end of central " + "directory record)\n"); + return false; + } + + const u1* end_of_central_dir = current; + get_u4le(current); // central directory locator signature, already checked + u2 number_of_this_disk = get_u2le(current); + u2 disk_with_central_dir = get_u2le(current); + u2 central_dir_entries_on_this_disk = get_u2le(current); + u2 central_dir_entries = get_u2le(current); + u4 central_dir_size = get_u4le(current); + u4 central_dir_offset = get_u4le(current); + u2 file_comment_length = get_u2le(current); + current += file_comment_length; // set current to the end of the central dir + + if (number_of_this_disk != 0 + || disk_with_central_dir != 0 + || central_dir_entries_on_this_disk != central_dir_entries) { + fprintf(stderr, "multi-disk JAR files are not supported\n"); + return false; + } + + // Do not change output values before determining that they are OK. + *offset = central_dir_offset; + // Central directory start can then be used to determine the actual + // starts of the zip file (which can be different in case of a non-zip + // header like for auto-extractable binaries). + *central_dir = end_of_central_dir - central_dir_size; + return true; +} + +void InputZipFile::Reset() { + central_dir_current_ = central_dir_; + zipdata_in_mapped_ = zipdata_in_; + p = zipdata_in_ + in_offset_; +} + +int ZipExtractor::ProcessAll() { + while (ProcessNext()) {} + if (GetError() != NULL) { + return -1; + } + return 0; +} + +ZipExtractor* ZipExtractor::Create(const char* filename, + ZipExtractorProcessor *processor) { + int fd_in = open(filename, O_RDONLY); + if (fd_in < 0) { + return NULL; + } + + off_t length = lseek(fd_in, 0, SEEK_END); + if (length < 0) { + return NULL; + } + + void *zipdata_in = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd_in, 0); + if (zipdata_in == MAP_FAILED) { + return NULL; + } + + u4 central_dir_offset; + const u1 *central_dir = NULL; + + if (!devtools_ijar::FindZipCentralDirectory( + static_cast(zipdata_in), length, + ¢ral_dir_offset, ¢ral_dir)) { + errno = EIO; // we don't really have a good error number + return NULL; + } + const u1 *zipdata_start = static_cast(zipdata_in); + off_t offset = - static_cast(zipdata_start + + central_dir_offset + - central_dir); + + return new InputZipFile(processor, fd_in, length, offset, + zipdata_start, central_dir); +} + +InputZipFile::InputZipFile(ZipExtractorProcessor *processor, int fd, + off_t in_length, off_t in_offset, + const u1* zipdata_in, const u1* central_dir) + : processor(processor), fd_in(fd), + zipdata_in_(zipdata_in), zipdata_in_mapped_(zipdata_in), + central_dir_(central_dir), in_length_(in_length), in_offset_(in_offset), + p(zipdata_in + in_offset), central_dir_current_(central_dir) { + uncompressed_data_allocated_ = INITIAL_BUFFER_SIZE; + uncompressed_data_ = + reinterpret_cast(malloc(uncompressed_data_allocated_)); + errmsg[0] = 0; +} + +InputZipFile::~InputZipFile() { + free(uncompressed_data_); + close(fd_in); +} + + +// +// Implementation of OutputZipFile +// +int OutputZipFile::WriteEmptyFile(const char *filename) { + const u1* file_name = (const u1*) filename; + size_t file_name_length = strlen(filename); + + LocalFileEntry *entry = new LocalFileEntry; + entry->local_header_offset = Offset(q); + entry->external_attr = 0; + entry->crc32 = 0; + + // Output the ZIP local_file_header: + put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE); + put_u2le(q, 10); // extract_version + put_u2le(q, 0); // general_purpose_bit_flag + put_u2le(q, 0); // compression_method + put_u2le(q, 0); // last_mod_file_time + put_u2le(q, 0); // last_mod_file_date + put_u4le(q, entry->crc32); // crc32 + put_u4le(q, 0); // compressed_size + put_u4le(q, 0); // uncompressed_size + put_u2le(q, file_name_length); + put_u2le(q, 0); // extra_field_length + put_n(q, file_name, file_name_length); + + entry->file_name_length = file_name_length; + entry->extra_field_length = 0; + entry->compressed_length = 0; + entry->uncompressed_length = 0; + entry->compression_method = 0; + entry->extra_field = (const u1 *)""; + entry->file_name = (u1*) strdup((const char *) file_name); + entries_.push_back(entry); + + return 0; +} + +void OutputZipFile::WriteCentralDirectory() { + // central directory: + const u1 *central_directory_start = q; + for (size_t ii = 0; ii < entries_.size(); ++ii) { + LocalFileEntry *entry = entries_[ii]; + put_u4le(q, CENTRAL_FILE_HEADER_SIGNATURE); + put_u2le(q, 0); // version made by + + put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract + put_u2le(q, 0); // general purpose bit flag + put_u2le(q, entry->compression_method); // compression method: + put_u2le(q, 0); // last_mod_file_time + put_u2le(q, 0); // last_mod_file_date + put_u4le(q, entry->crc32); // crc32 + put_u4le(q, entry->compressed_length); // compressed_size + put_u4le(q, entry->uncompressed_length); // uncompressed_size + put_u2le(q, entry->file_name_length); + put_u2le(q, entry->extra_field_length); + + put_u2le(q, 0); // file comment length + put_u2le(q, 0); // disk number start + put_u2le(q, 0); // internal file attributes + put_u4le(q, entry->external_attr); // external file attributes + // relative offset of local header: + put_u4le(q, entry->local_header_offset); + + put_n(q, entry->file_name, entry->file_name_length); + put_n(q, entry->extra_field, entry->extra_field_length); + } + u4 central_directory_size = q - central_directory_start; + + put_u4le(q, END_OF_CENTRAL_DIR_SIGNATURE); + put_u2le(q, 0); // number of this disk + put_u2le(q, 0); // number of the disk with the start of the central directory + put_u2le(q, entries_.size()); // # central dir entries on this disk + put_u2le(q, entries_.size()); // total # entries in the central directory + put_u4le(q, central_directory_size); // size of the central directory + put_u4le(q, Offset(central_directory_start)); // offset of start of central + // directory wrt starting disk + put_u2le(q, 0); // .ZIP file comment length +} + +u1* OutputZipFile::WriteLocalFileHeader(const char* filename, const u4 attr) { + off_t file_name_length_ = strlen(filename); + LocalFileEntry *entry = new LocalFileEntry; + entry->local_header_offset = Offset(q); + entry->file_name_length = file_name_length_; + entry->file_name = new u1[file_name_length_]; + entry->external_attr = attr; + memcpy(entry->file_name, filename, file_name_length_); + entry->extra_field_length = 0; + entry->extra_field = (const u1 *)""; + + // Output the ZIP local_file_header: + put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE); + put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract + put_u2le(q, 0); // general purpose bit flag + u1 *header_ptr = q; + put_u2le(q, COMPRESSION_METHOD_STORED); // compression method = placeholder + put_u2le(q, 0); // last_mod_file_time + put_u2le(q, 0); // last_mod_file_date + put_u4le(q, entry->crc32); // crc32 + put_u4le(q, 0); // compressed_size = placeholder + put_u4le(q, 0); // uncompressed_size = placeholder + put_u2le(q, entry->file_name_length); + put_u2le(q, entry->extra_field_length); + + put_n(q, entry->file_name, entry->file_name_length); + put_n(q, entry->extra_field, entry->extra_field_length); + entries_.push_back(entry); + + return header_ptr; +} + +// Try to compress a file entry in memory using the deflate algorithm. +// It will compress buf (of size length) unless the compressed size is bigger +// than the input size. The result will overwrite the content of buf and the +// final size is returned. +size_t TryDeflate(u1 *buf, size_t length) { + u1 *outbuf = reinterpret_cast(malloc(length)); + z_stream stream; + + // Initialize the z_stream strcut for reading from buf and wrinting in outbuf. + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; + stream.opaque = Z_NULL; + stream.total_in = length; + stream.avail_in = length; + stream.total_out = length; + stream.avail_out = length; + stream.next_in = buf; + stream.next_out = outbuf; + + // deflateInit2 negative windows size prevent the zlib wrapper to be used. + if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + -MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) { + // Failure to compress => return the buffer uncompressed + free(outbuf); + return length; + } + + if (deflate(&stream, Z_FINISH) == Z_STREAM_END) { + // Compression successful and fits in outbuf, let's copy the result in buf. + length = stream.total_out; + memcpy(buf, outbuf, length); + } + + deflateEnd(&stream); + free(outbuf); + + // Return the length of the resulting buffer + return length; +} + +size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr, + size_t out_length, + bool compress, + const u4 crc) { + size_t compressed_size = out_length; + if (compress) { + compressed_size = TryDeflate(q, out_length); + } + // compression method + if (compressed_size < out_length) { + put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED); + } else { + put_u2le(header_ptr, COMPRESSION_METHOD_STORED); + } + header_ptr += 4; + put_u4le(header_ptr, crc); // crc32 + put_u4le(header_ptr, compressed_size); // compressed_size + put_u4le(header_ptr, out_length); // uncompressed_size + return compressed_size; +} + +int OutputZipFile::Finish() { + if (fd_out > 0) { + WriteCentralDirectory(); + if (ftruncate(fd_out, GetSize()) < 0) { + return error("ftruncate(fd_out, GetSize()): %s", strerror(errno)); + } + if (close(fd_out) < 0) { + return error("close(fd_out): %s", strerror(errno)); + } + fd_out = -1; + } + return 0; +} + +u1* OutputZipFile::NewFile(const char* filename, const u4 attr) { + header_ptr = WriteLocalFileHeader(filename, attr); + return q; +} + +int OutputZipFile::FinishFile(size_t filelength, bool compress, + bool compute_crc) { + u4 crc = 0; + if (compute_crc) { + crc = crc32(crc, q, filelength); + } + size_t compressed_size = + WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress, crc); + entries_.back()->crc32 = crc; + entries_.back()->compressed_length = compressed_size; + entries_.back()->uncompressed_length = filelength; + if (compressed_size < filelength) { + entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED; + } else { + entries_.back()->compression_method = COMPRESSION_METHOD_STORED; + } + q += compressed_size; + return 0; +} + +ZipBuilder* ZipBuilder::Create(const char* zip_file, u8 estimated_size) { + if (estimated_size > kMaximumOutputSize) { + fprintf(stderr, + "Uncompressed input jar has size %llu, " + "which exceeds the maximum supported output size %llu.\n" + "Assuming that ijar will be smaller and hoping for the best.\n", + estimated_size, kMaximumOutputSize); + estimated_size = kMaximumOutputSize; + } + + int fd_out = open(zip_file, O_CREAT|O_RDWR|O_TRUNC, 0644); + if (fd_out < 0) { + return NULL; + } + + // Create mmap-able sparse file + if (ftruncate(fd_out, estimated_size) < 0) { + return NULL; + } + + // Ensure that any buffer overflow in JarStripper will result in + // SIGSEGV or SIGBUS by over-allocating beyond the end of the file. + size_t mmap_length = std::min(estimated_size + sysconf(_SC_PAGESIZE), + (u8) std::numeric_limits::max()); + + void *zipdata_out = mmap(NULL, mmap_length, PROT_WRITE, + MAP_SHARED, fd_out, 0); + if (zipdata_out == MAP_FAILED) { + fprintf(stderr, "output_length=%llu\n", estimated_size); + return NULL; + } + + return new OutputZipFile(fd_out, (u1*) zipdata_out); +} + +u8 ZipBuilder::EstimateSize(char **files) { + struct stat statst; + // Digital signature field size = 6, End of central directory = 22, Total = 28 + u8 size = 28; + // Count the size of all the files in the input to estimate the size of the + // output. + for (int i = 0; files[i] != NULL; i++) { + if (stat(files[i], &statst) != 0) { + fprintf(stderr, "File %s does not seem to exist.", files[i]); + return 0; + } + size += statst.st_size; + // Add sizes of Zip meta data + // local file header = 30 bytes + // data descriptor = 12 bytes + // central directory descriptor = 46 bytes + // Total: 88bytes + size += 88; + // The filename is stored twice (once in the central directory + // and once in the local file header). + size += strlen(files[i]) * 2; + } + return size; +} + +} // namespace devtools_ijar diff --git a/tools/ijar/zip.h b/tools/ijar/zip.h new file mode 100644 index 000000000..dda2c6e18 --- /dev/null +++ b/tools/ijar/zip.h @@ -0,0 +1,173 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// zip.h -- .zip (.jar) file reading/writing routines. +// +// This file specifies the interface to use the ZIP implementation of ijar. +// + +#ifndef INCLUDED_THIRD_PARTY_IJAR_ZIP_H +#define INCLUDED_THIRD_PARTY_IJAR_ZIP_H + +#include + +#include "common.h" + +namespace devtools_ijar { + +// Tells if this is a directory entry from the mode. This method +// is safer than zipattr_to_mode(attr) & S_IFDIR because the unix +// mode might not be set in DOS zip files. +inline bool zipattr_is_dir(u4 attr) { return (attr & 0x10) != 0; } + +// Convert a Unix file mode to a ZIP file attribute +inline u4 mode_to_zipattr(mode_t m) { + return (((u4) m) << 16) + ((m & S_IFDIR) != 0 ? 0x10 : 0); +} + +// Convert a ZIP file attribute to a Unix file mode +inline mode_t zipattr_to_mode(u4 attr) { + return ((mode_t) ((attr >> 16) & 0xffff)); +} + +// +// Class interface for building ZIP files +// +class ZipBuilder { + public: + virtual ~ZipBuilder() {} + + // Returns the text for the last error, or null on no last error. + virtual const char* GetError() = 0; + + // Add a new file to the ZIP, the file will have path "filename" + // and external attributes "attr". This function returns a pointer + // to a memory buffer to write the data of the file into. This buffer + // is owned by ZipBuilder and should not be free'd by the caller. The + // file length is then specified when the files is finished written + // using the FinishFile(size_t) function. + // On failure, returns NULL and GetError() will return an non-empty message. + virtual u1* NewFile(const char* filename, const u4 attr) = 0; + + // Finish writing a file and specify its length. After calling this method + // one should not reuse the pointer given by NewFile. The file can be + // compressed using the deflate algorithm by setting `compress` to true. + // By default, CRC32 are not computed as java tooling doesn't care, but + // computing it can be activated by setting `compute_crc` to true. + // On failure, returns -1 and GetError() will return an non-empty message. + virtual int FinishFile(size_t filelength, + bool compress = false, + bool compute_crc = false) = 0; + + // Write an empty file, it is equivalent to: + // NewFile(filename, 0); + // FinishFile(0); + // On failure, returns -1 and GetError() will return an non-empty message. + virtual int WriteEmptyFile(const char* filename) = 0; + + // Finish writing the ZIP file. This method can be called only once + // (subsequent calls will do nothing) and none of + // NewFile/FinishFile/WriteEmptyFile should be called after calling Finish. If + // this method was not called when the object is destroyed, it will be called. + // It is here as a convenience to get information on the final generated ZIP + // file. + // On failure, returns -1 and GetError() will return an non-empty message. + virtual int Finish() = 0; + + // Get the current size of the ZIP file. This size will not be matching the + // final ZIP file until Finish() has been called because Finish() is actually + // writing the central directory of the ZIP File. + virtual size_t GetSize() = 0; + + // Returns the current number of files stored in the ZIP. + virtual int GetNumberFiles() = 0; + + // Create a new ZipBuilder writing the file zip_file and the size of the + // output will be at most estimated_size. Use ZipBuilder::EstimateSize() or + // ZipExtractor::CalculateOuputLength() to have an estimated_size depending on + // a list of file to store. + // On failure, returns NULL. Refer to errno for error code. + static ZipBuilder* Create(const char* zip_file, u8 estimated_size); + + // Estimate the maximum size of the ZIP files containing files in the "files" + // null-terminated array. + // Returns 0 on error. + static u8 EstimateSize(char **files); +}; + +// +// An abstract class to process data from a ZipExtractor. +// Derive from this class if you wish to process data from a ZipExtractor. +// +class ZipExtractorProcessor { + public: + virtual ~ZipExtractorProcessor() {} + + // Tells whether to skip or process the file "filename". "attr" is the + // external file attributes and can be converted to unix mode using the + // zipattr_to_mode() function. This method is suppoed to returns true + // if the file should be processed and false if it should be skipped. + virtual bool Accept(const char* filename, const u4 attr) = 0; + + // Process a file accepted by Accept. The file "filename" has external + // attributes "attr" and length "size". The file content is accessible + // in the buffer pointed by "data". + virtual void Process(const char* filename, const u4 attr, + const u1* data, const size_t size) = 0; +}; + +// +// Class interface for reading ZIP files +// +class ZipExtractor { + public: + virtual ~ZipExtractor() {} + + // Returns the text for the last error, or null on no last error. + virtual const char* GetError() = 0; + + // Process the next files, returns false if the end of ZIP file has been + // reached. The processor provided by the Create method will be called + // if a file is encountered. If false is returned, check the return value + // of GetError() for potential errors. + virtual bool ProcessNext() = 0; + + // Process the all files, returns -1 on error (GetError() will be populated + // on error). + virtual int ProcessAll(); + + // Reset the file pointer to the beginning. + virtual void Reset() = 0; + + // Return the size of the ZIP file. + virtual size_t GetSize() = 0; + + // Return the size of the resulting zip file by keeping only file + // accepted by the processor and storing them uncompressed. This + // method can be used to create a ZipBuilder for storing a subset + // of the input files. + // On error, 0 is returned and GetError() returns a non-empty message. + virtual u8 CalculateOutputLength() = 0; + + // Create a ZipExtractor that extract the zip file "filename" and process + // it with "processor". + // On error, a null pointer is returned and the value of errno should be + // checked. + static ZipExtractor* Create(const char* filename, + ZipExtractorProcessor *processor); +}; + +} // namespace devtools_ijar + +#endif // INCLUDED_THIRD_PARTY_IJAR_ZIP_H diff --git a/tools/ijar/zip_main.cc b/tools/ijar/zip_main.cc new file mode 100644 index 000000000..3f4a50ccb --- /dev/null +++ b/tools/ijar/zip_main.cc @@ -0,0 +1,312 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Author: Alan Donovan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +// Zip / Unzip file using ijar zip implementation. +// +// Note that this Zip implementation intentionally don't compute CRC-32 +// because it is useless computation for jar because Java doesn't care. +// CRC-32 of all files in the zip file will be set to 0. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zip.h" + +namespace devtools_ijar { + +#define SYSCALL(expr) do { \ + if ((expr) < 0) { \ + perror(#expr); \ + abort(); \ + } \ + } while (0) + +// +// A ZipExtractorProcessor that extract all files in the ZIP file. +// +class UnzipProcessor : public ZipExtractorProcessor { + public: + // Create a processor who will extract the files into output_root + // if "extract" is set to true and will print the list of files and + // their unix modes if "verbose" is set to true. + UnzipProcessor(const char *output_root, bool verbose, bool extract) + : output_root_(output_root), verbose_(verbose), extract_(extract) {} + virtual ~UnzipProcessor() {} + + virtual void Process(const char* filename, const u4 attr, + const u1* data, const size_t size); + virtual bool Accept(const char* filename, const u4 attr) { + return true; + } + + private: + const char *output_root_; + const bool verbose_; + const bool extract_; +}; + +// Concatene 2 path, path1 and path2, using / as a directory separator and +// puting the result in "out". "size" specify the size of the output buffer +void concat_path(char* out, const size_t size, + const char *path1, const char *path2) { + int len1 = strlen(path1); + size_t l = len1; + strncpy(out, path1, size - 1); + out[size-1] = 0; + if (l < size - 1 && path1[len1] != '/' && path2[0] != '/') { + out[l] = '/'; + l++; + out[l] = 0; + } + if (l < size - 1) { + strncat(out, path2, size - 1 - l); + } +} + +// Do a recursive mkdir of all folders of path except the last path +// segment (if path ends with a / then the last path segment is empty). +// All folders are created using "mode" for creation mode. +void mkdirs(const char *path, mode_t mode) { + char path_[PATH_MAX]; + struct stat statst; + strncpy(path_, path, PATH_MAX); + path_[PATH_MAX-1] = 0; + char *pointer = path_; + while ((pointer = strchr(pointer, '/')) != NULL) { + if (path_ != pointer) { // skip leading slash + *pointer = 0; + if (stat(path_, &statst) != 0) { + if (mkdir(path_, mode) < 0) { + fprintf(stderr, "Cannot create folder %s: %s\n", + path_, strerror(errno)); + abort(); + } + } + *pointer = '/'; + } + pointer++; + } +} + +void UnzipProcessor::Process(const char* filename, const u4 attr, + const u1* data, const size_t size) { + mode_t mode = zipattr_to_mode(attr); + mode_t perm = mode & 0777; + bool isdir = (mode & S_IFDIR) != 0; + if (attr == 0) { + // Fallback when the external attribute is not set. + isdir = filename[strlen(filename)-1] == '/'; + perm = 0777; + } + if (verbose_) { + printf("%c %o %s\n", isdir ? 'd' : 'f', perm, filename); + } + if (extract_) { + char path[PATH_MAX]; + int fd; + concat_path(path, PATH_MAX, output_root_, filename); + mkdirs(path, perm); + if (!isdir) { + fd = open(path, O_CREAT | O_WRONLY, perm); + if (fd < 0) { + fprintf(stderr, "Cannot open file %s for writing: %s\n", + path, strerror(errno)); + abort(); + } + SYSCALL(write(fd, data, size)); + SYSCALL(close(fd)); + } + } +} + +// Get the basename of path and store it in output. output_size +// is the size of the output buffer. +void basename(const char *path, char *output, size_t output_size) { + const char *pointer = strrchr(path, '/'); + if (pointer == NULL) { + pointer = path; + } else { + pointer++; // Skip the leading slash. + } + strncpy(output, pointer, output_size); + output[output_size-1] = 0; +} + + +// Execute the extraction (or just listing if just v is provided) +int extract(char *zipfile, bool verbose, bool extract) { + char output_root[PATH_MAX]; + getcwd(output_root, PATH_MAX); + + UnzipProcessor processor(output_root, verbose, extract); + std::unique_ptr extractor(ZipExtractor::Create(zipfile, + &processor)); + if (extractor.get() == NULL) { + fprintf(stderr, "Unable to open zip file %s: %s.\n", zipfile, + strerror(errno)); + return -1; + } + + if (extractor->ProcessAll() < 0) { + fprintf(stderr, "%s.\n", extractor->GetError()); + return -1; + } + return 0; +} + +// Execute the create operation +int create(char *zipfile, char **files, bool flatten, bool verbose, + bool compress) { + struct stat statst; + u8 size = ZipBuilder::EstimateSize(files); + if (size == 0) { + return -1; + } + std::unique_ptr builder(ZipBuilder::Create(zipfile, size)); + if (builder.get() == NULL) { + fprintf(stderr, "Unable to create zip file %s: %s.\n", + zipfile, strerror(errno)); + return -1; + } + for (int i = 0; files[i] != NULL; i++) { + stat(files[i], &statst); + char path[PATH_MAX]; + bool isdir = (statst.st_mode & S_IFDIR) != 0; + + if (flatten && isdir) { + continue; + } + + // Compute the path, flattening it if requested + if (flatten) { + basename(files[i], path, PATH_MAX); + } else { + strncpy(path, files[i], PATH_MAX); + path[PATH_MAX-1] = 0; + size_t len = strlen(path); + if (isdir && len < PATH_MAX - 1) { + // Add the trailing slash for folders + path[len] = '/'; + path[len+1] = 0; + } + } + + if (verbose) { + mode_t perm = statst.st_mode & 0777; + printf("%c %o %s\n", isdir ? 'd' : 'f', perm, path); + } + + u1 *buffer = builder->NewFile(path, mode_to_zipattr(statst.st_mode)); + if (isdir || statst.st_size == 0) { + builder->FinishFile(0); + } else { + // mmap the input file and memcpy + int fd = open(files[i], O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Can't open file %s for reading: %s.\n", + files[i], strerror(errno)); + return -1; + } + void *data = mmap(NULL, statst.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (data == MAP_FAILED) { + fprintf(stderr, "Can't mmap file %s for reading: %s.\n", + files[i], strerror(errno)); + return -1; + } + memcpy(buffer, data, statst.st_size); + munmap(data, statst.st_size); + builder->FinishFile(statst.st_size, compress, true); + } + } + if (builder->Finish() < 0) { + fprintf(stderr, "%s\n", builder->GetError()); + return -1; + } + return 0; +} + +} // namespace devtools_ijar + +// +// main method +// +static void usage(char *progname) { + fprintf(stderr, "Usage: %s [vxc[fC]] x.zip [file1...filen]\n", progname); + fprintf(stderr, " v verbose - list all file in x.zip\n"); + fprintf(stderr, " x extract - extract file in x.zip in current directory\n"); + fprintf(stderr, " c create - add files to x.zip\n"); + fprintf(stderr, " f flatten - flatten files to use with create operation\n"); + fprintf(stderr, + " C compress - compress files when using the create operation\n"); + fprintf(stderr, "x and c cannot be used in the same command-line.\n"); + exit(1); +} + +int main(int argc, char **argv) { + bool extract = false; + bool verbose = false; + bool create = false; + bool compress = false; + bool flatten = false; + + if (argc < 3) { + usage(argv[0]); + } + + for (int i = 0; argv[1][i] != 0; i++) { + switch (argv[1][i]) { + case 'x': + extract = true; + break; + case 'v': + verbose = true; + break; + case 'c': + create = true; + break; + case 'f': + flatten = true; + break; + case 'C': + compress = true; + break; + default: + usage(argv[0]); + } + } + if (create) { + if (extract) { + usage(argv[0]); + } + // Create a zip + return devtools_ijar::create(argv[2], argv + 3, flatten, verbose, compress); + } else { + if (flatten) { + usage(argv[0]); + } + // Extraction / list mode + return devtools_ijar::extract(argv[2], verbose, extract); + } +}