001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.io.InputStreamReader; 022import java.io.OutputStream; 023import java.io.Reader; 024import java.io.UncheckedIOException; 025import java.io.UnsupportedEncodingException; 026import java.nio.charset.Charset; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.CopyOption; 029import java.nio.file.Files; 030import java.nio.file.InvalidPathException; 031import java.nio.file.Path; 032import java.nio.file.Paths; 033import java.nio.file.StandardCopyOption; 034import java.util.UUID; 035import java.util.concurrent.atomic.AtomicInteger; 036import java.util.function.Supplier; 037 038import org.apache.commons.fileupload2.core.DeferrableOutputStream.Listener; 039import org.apache.commons.fileupload2.core.DeferrableOutputStream.State; 040import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder; 041import org.apache.commons.io.Charsets; 042import org.apache.commons.io.FileCleaningTracker; 043import org.apache.commons.io.build.AbstractOrigin; 044import org.apache.commons.io.file.PathUtils; 045 046 047/** 048 * The default implementation of the {@link FileItem FileItem} interface. 049 * 050 * <p>After retrieving an instance of this class from a {@link DiskFileItemFactory} instance (see 051 * {@code org.apache.commons.fileupload2.core.servlet.ServletFileUpload 052 * #parseRequest(javax.servlet.http.HttpServletRequest)}), you may either request all contents of file at once using {@link #get()} or request an 053 * {@link java.io.InputStream InputStream} with {@link #getInputStream()} and process the file without attempting to load it into memory, which may come handy 054 * with large files.</p> 055 * 056 * <p><em>State model</em>: Instances of {@link DiskFileItem} are subject to a carefully designed state model. 057 * Depending on the so-called {@link #getThreshold() threshold}, either of the three models are possible:</p> 058 * <ol> 059 * <li><em>threshold = -1</em> 060 * Uploaded data is never kept in memory. Instead, a temporary file is being created immediately. 061 * 062 * {@link #isInMemory()} will always return false, {@link #getPath()} will always return the path 063 * of an existing file. The temporary file may be empty.</li> 064 * <li><em>threshold = 0</em> 065 * Uploaded data is never kept in memory. (Same as threshold=-1.) However, the temporary file is 066 * only created, if data was uploaded. Or, in other words: The uploaded file will never be 067 * empty. 068 * 069 * {@link #isInMemory()} will return true, if no data was uploaded, otherwise it will be false. 070 * In the former case {@link #getPath()} will return null, but in the latter case it returns 071 * the path of an existing, non-empty file.</li> 072 * <li><em>threshold > 0</em> 073 * Uploaded data will be kept in memory, if the size is below the threshold. If the size 074 * is equal to, or above the threshold, then a temporary file has been created, and all 075 * uploaded data has been transferred to that file. 076 * 077 * {@link #isInMemory()} returns true, if the size of the uploaded data is below the threshold. 078 * If so, {@link #getPath()} returns null. Otherwise, {@link #isInMemory()} returns false, 079 * and {@link #getPath()} returns the path of an existing, temporary file. The size 080 * of the temporary file is equal to, or above the threshold.</li> 081 * </ol> 082 * 083 * <p>Temporary files, which are created for file items, should be deleted later on. The best way to do this is using a 084 * {@link org.apache.commons.io.FileCleaningTracker}, which you can set on the {@link DiskFileItemFactory}. However, if you do use such a tracker, then you must 085 * consider the following: Temporary files are automatically deleted as soon as they are no longer needed. (More precisely, when the corresponding instance of 086 * {@link java.io.File} is garbage collected.) This is done by the so-called reaper thread, which is started and stopped automatically by the 087 * {@link org.apache.commons.io.FileCleaningTracker} when there are files to be tracked. It might make sense to terminate that thread, for example, if your web 088 * application ends. See the section on "Resource cleanup" in the users guide of Commons FileUpload.</p> 089 */ 090public final class DiskFileItem implements FileItem<DiskFileItem> { 091 092 /** 093 * Builds a new {@link DiskFileItem} instance. 094 * <p> 095 * For example: 096 * </p> 097 * 098 * <pre>{@code 099 * final FileItem fileItem = fileItemFactory.fileItemBuilder() 100 * .setFieldName("FieldName") 101 * .setContentType("ContentType") 102 * .setFormField(true) 103 * .setFileName("FileName") 104 * .setFileItemHeaders(...) 105 * .get(); 106 * } 107 * </pre> 108 */ 109 public static class Builder extends AbstractFileItemBuilder<DiskFileItem, Builder> { 110 111 /** 112 * The threshold. We do maintain this separate from the {@link #getBufferSize()}, 113 * because the parent class might change the value in {@link #setBufferSize(int)}. 114 */ 115 private int threshold; 116 117 /** 118 * Constructs a new instance. 119 */ 120 public Builder() { 121 setBufferSize(DiskFileItemFactory.DEFAULT_THRESHOLD); 122 setPath(PathUtils.getTempDirectory()); 123 setCharset(DEFAULT_CHARSET); 124 setCharsetDefault(DEFAULT_CHARSET); 125 } 126 127 /** 128 * Constructs a new instance. 129 * <p> 130 * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an 131 * {@link UnsupportedOperationException}. 132 * </p> 133 * 134 * @return a new instance. 135 * @throws UnsupportedOperationException if the origin cannot provide a Path. 136 * @see AbstractOrigin#getReader(Charset) 137 */ 138 @Override 139 public DiskFileItem get() { 140 final var diskFileItem = new DiskFileItem(this); 141 final var tracker = getFileCleaningTracker(); 142 if (tracker != null) { 143 diskFileItem.setFileCleaningTracker(tracker); 144 } 145 return diskFileItem; 146 } 147 148 /** 149 * Equivalent to {@link #getThreshold()}. 150 * @return The threshold, which is being used. 151 * @see #getThreshold() 152 * @deprecated Since 2.0.0, use {@link #getThreshold()} instead. 153 */ 154 public int getBufferSize() { 155 return getThreshold(); 156 } 157 158 /** 159 * Returns the threshold. 160 * @return The threshold. 161 */ 162 public int getThreshold() { 163 return threshold; 164 } 165 166 /** 167 * Equivalent to {@link #setThreshold(int)}. 168 * @param bufferSize The threshold, which is being used. 169 * @see #setThreshold(int) 170 * @return This builder. 171 * @deprecated Since 2.0.0, use {@link #setThreshold(int)} instead. 172 */ 173 @Override 174 public Builder setBufferSize(final int bufferSize) { 175 return setThreshold(bufferSize); 176 } 177 178 /** 179 * Sets the threshold. The uploaded data is typically kept in memory, until 180 * a certain number of bytes (the threshold) is reached. At this point, the 181 * incoming data is transferred to a temporary file, and the in-memory data 182 * is removed. 183 * @param threshold The threshold, which is being used. 184 * @return This builder. 185 */ 186 public Builder setThreshold(final int threshold) { 187 this.threshold = threshold; 188 return this; 189 } 190 } 191 192 /** 193 * Default content charset to be used when no explicit charset parameter is provided by the sender. Media subtypes of the "text" type are defined to have a 194 * default charset value of "ISO-8859-1" when received via HTTP. 195 */ 196 public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1; 197 198 /** 199 * UID used in unique file name generation. 200 */ 201 private static final String UID = UUID.randomUUID().toString().replace('-', '_'); 202 203 /** 204 * Counter used in unique identifier generation. 205 */ 206 private static final AtomicInteger COUNTER = new AtomicInteger(); 207 208 /** 209 * Constructs a new {@link Builder}. 210 * 211 * @return a new {@link Builder}. 212 */ 213 public static Builder builder() { 214 return new Builder(); 215 } 216 217 /** 218 * Tests if the file name is valid. For example, if it contains a NUL characters, it's invalid. If the file name is valid, it will be returned without any 219 * modifications. Otherwise, throw an {@link InvalidPathException}. 220 * 221 * @param fileName The file name to check 222 * @return Unmodified file name, if valid. 223 * @throws InvalidPathException The file name is invalid. 224 */ 225 public static String checkFileName(final String fileName) { 226 if (fileName != null) { 227 // Specific NUL check to build a better exception message. 228 final var indexOf0 = fileName.indexOf(0); 229 if (indexOf0 != -1) { 230 final var sb = new StringBuilder(); 231 for (var i = 0; i < fileName.length(); i++) { 232 final var c = fileName.charAt(i); 233 if (c == 0) { 234 sb.append("\\0"); 235 } else { 236 sb.append(c); 237 } 238 } 239 throw new InvalidPathException(fileName, sb.toString(), indexOf0); 240 } 241 // Throws InvalidPathException on invalid file names 242 Paths.get(fileName); 243 } 244 return fileName; 245 } 246 247 /** 248 * Gets an identifier that is unique within the class loader used to load this class, but does not have random-like appearance. 249 * 250 * @return A String with the non-random looking instance identifier. 251 */ 252 private static String getUniqueId() { 253 final var limit = 100_000_000; 254 final var current = COUNTER.getAndIncrement(); 255 var id = Integer.toString(current); 256 257 // If you manage to get more than 100 million of ids, you'll 258 // start getting ids longer than 8 characters. 259 if (current < limit) { 260 id = ("00000000" + id).substring(id.length()); 261 } 262 return id; 263 } 264 265 /** 266 * The name of the form field as provided by the browser. 267 */ 268 private String fieldName; 269 270 /** 271 * The content type passed by the browser, or {@code null} if not defined. 272 */ 273 private final String contentType; 274 275 /** 276 * Whether or not this item is a simple form field. 277 */ 278 private volatile boolean isFormField; 279 280 /** 281 * The original file name in the user's file system. 282 */ 283 private final String fileName; 284 285 /** 286 * The threshold above which uploads will be stored on disk. 287 */ 288 private final int threshold; 289 290 /** 291 * The directory in which uploaded files will be stored, if stored on disk, never null. 292 */ 293 private final Path repository; 294 295 /** 296 * Output stream for this item. 297 */ 298 private DeferrableOutputStream dos; 299 300 /** 301 * The file items headers. 302 */ 303 private FileItemHeaders fileItemHeaders; 304 305 /** 306 * Default content Charset to be used when no explicit Charset parameter is provided by the sender. 307 */ 308 private Charset charsetDefault = DEFAULT_CHARSET; 309 310 /** 311 * The {@link FileCleaningTracker}, which is being used to remove 312 * temporary files. 313 */ 314 private FileCleaningTracker fileCleaningTracker; 315 316 /** 317 * Constructs a new {@code DiskFileItem} instance. 318 * 319 * @param builder The DiskFileItem builder. 320 */ 321 private DiskFileItem(final Builder builder) { 322 this.fieldName = builder.getFieldName(); 323 this.contentType = builder.getContentType(); 324 this.charsetDefault = builder.getCharset(); 325 this.isFormField = builder.isFormField(); 326 this.fileName = builder.getFileName(); 327 this.fileItemHeaders = builder.getFileItemHeaders(); 328 this.threshold = builder.getThreshold(); 329 this.repository = builder.getPath() != null ? builder.getPath() : PathUtils.getTempDirectory(); 330 } 331 332 /** 333 * Deletes the underlying storage for a file item, including deleting any associated temporary disk file. This method can be used to ensure that this is 334 * done at an earlier time, thus preserving system resources. 335 * 336 * @throws IOException if an error occurs. 337 */ 338 @Override 339 public DiskFileItem delete() throws IOException { 340 if (dos != null) { 341 final Path path = dos.getPath(); 342 if (path != null) { 343 Files.deleteIfExists(path); 344 } 345 } 346 return this; 347 } 348 349 /** 350 * Gets the contents of the file as an array of bytes. If the contents of the file were not yet cached in memory, they will be loaded from the disk storage 351 * and cached. 352 * 353 * @return The contents of the file as an array of bytes or {@code null} if the data cannot be read. 354 * @throws IOException if an I/O error occurs. 355 * @throws OutOfMemoryError See {@link Files#readAllBytes(Path)}: If an array of the required size cannot be allocated, for example the file is larger 356 * than {@code 2GB}. If so, you should use {@link #getInputStream()}. 357 * @see #getInputStream() 358 * @deprecated Since 2.0.0, use {@link #getInputStream()}, or {@link #getReader()}, instead. 359 */ 360 @Override 361 public byte[] get() throws IOException { 362 if (dos != null) { 363 final byte[] bytes = dos.getBytes(); 364 if (bytes != null) { 365 return bytes; 366 } 367 final Path path = dos.getPath(); 368 if (path != null && dos.getState() == State.closed) { 369 return Files.readAllBytes(path); 370 } 371 } 372 return null; 373 } 374 375 /** 376 * Gets the content charset passed by the agent or {@code null} if not defined. 377 * 378 * @return The content charset passed by the agent or {@code null} if not defined. 379 */ 380 public Charset getCharset() { 381 final var parser = new ParameterParser(); 382 parser.setLowerCaseNames(true); 383 // Parameter parser can handle null input 384 final var params = parser.parse(getContentType(), ';'); 385 return Charsets.toCharset(params.get("charset"), charsetDefault); 386 } 387 388 /** 389 * Gets the default charset for use when no explicit charset parameter is provided by the sender. 390 * 391 * @return the default charset 392 */ 393 public Charset getCharsetDefault() { 394 return charsetDefault; 395 } 396 397 /** 398 * Gets the content type passed by the agent or {@code null} if not defined. 399 * 400 * @return The content type passed by the agent or {@code null} if not defined. 401 */ 402 @Override 403 public String getContentType() { 404 return contentType; 405 } 406 407 /** 408 * Gets the name of the field in the multipart form corresponding to this file item. 409 * 410 * @return The name of the form field. 411 * @see #setFieldName(String) 412 */ 413 @Override 414 public String getFieldName() { 415 return fieldName; 416 } 417 418 /** 419 * Returns the {@link FileCleaningTracker}, which is being used to remove 420 * temporary files. 421 * @return The {@link FileCleaningTracker}, which is being used to remove 422 * temporary files. 423 */ 424 public FileCleaningTracker getFileCleaningTracker() { 425 return fileCleaningTracker; 426 } 427 428 /** 429 * Gets the file item headers. 430 * 431 * @return The file items headers. 432 */ 433 @Override 434 public FileItemHeaders getHeaders() { 435 return fileItemHeaders; 436 } 437 438 /** 439 * Gets an {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file. 440 * 441 * @return An {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file. 442 * @throws IOException if an error occurs. 443 */ 444 @Override 445 public InputStream getInputStream() throws IOException { 446 if (dos != null && dos.getState() == State.closed) { 447 return dos.getInputStream(); 448 } 449 throw new IllegalStateException("The file item has not been fully read."); 450 } 451 452 /** 453 * Gets the original file name in the client's file system. 454 * 455 * @return The original file name in the client's file system. 456 * @throws InvalidPathException The file name contains a NUL character, which might be an indicator of a security attack. If you intend to use the file name 457 * anyways, catch the exception and use {@link InvalidPathException#getInput()}. 458 */ 459 @Override 460 public String getName() { 461 return checkFileName(fileName); 462 } 463 464 /** 465 * Gets an {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file. 466 * 467 * @return An {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file. 468 */ 469 @Override 470 public OutputStream getOutputStream() { 471 if (dos == null) { 472 final Supplier<Path> pathSupplier = 473 () -> this.repository.resolve(String.format("upload_%s_%s.tmp", UID, getUniqueId())); 474 try { 475 final Listener persistenceListener = new Listener() { 476 @Override 477 public void persisted(final Path pPath) { 478 Listener.super.persisted(pPath); 479 final FileCleaningTracker fct = getFileCleaningTracker(); 480 if (fct != null) { 481 fct.track(getPath(), this); 482 } 483 } 484 }; 485 dos = new DeferrableOutputStream(threshold, pathSupplier, persistenceListener); 486 } catch (final IOException ioe) { 487 throw new UncheckedIOException(ioe); 488 } 489 } 490 return dos; 491 } 492 493 /** 494 * Gets the {@link Path} for the {@code FileItem}'s data's temporary location on the disk. Note that for {@code FileItem}s that have their data stored in 495 * memory, this method will return {@code null}. When handling large files, you can use {@link Files#move(Path,Path,CopyOption...)} to move the file to a 496 * new location without copying the data, if the source and destination locations reside within the same logical volume. 497 * 498 * @return The data file, or {@code null} if the data is stored in memory. 499 */ 500 public Path getPath() { 501 return dos == null ? null : dos.getPath(); 502 } 503 504 /** 505 * Returns the contents of the file as a {@link Reader}, using the specified 506 * {@link #getCharset()}. If the contents are not yet available, returns null. 507 * This is the case, for example, if the underlying output stream has not yet 508 * been closed. 509 * @return The contents of the file as a {@link Reader} 510 * @throws UnsupportedEncodingException The character set, which is 511 * specified in the files "content-type" header, is invalid. 512 * @throws IOException An I/O error occurred, while the 513 * underlying {@link #getInputStream() input stream} was created. 514 */ 515 public Reader getReader() throws IOException, UnsupportedEncodingException { 516 final InputStream is = getInputStream(); 517 final var parser = new ParameterParser(); 518 parser.setLowerCaseNames(true); 519 // Parameter parser can handle null input 520 final var params = parser.parse(getContentType(), ';'); 521 final Charset cs = Charsets.toCharset(params.get("charset"), charsetDefault); 522 return new InputStreamReader(is, cs); 523 } 524 525 /** 526 * Gets the size of the file. 527 * 528 * @return The size of the file, in bytes. 529 */ 530 @Override 531 public long getSize() { 532 return dos == null ? 0L : dos.getSize(); 533 } 534 535 /** 536 * Gets the contents of the file as a String, using the default character encoding. This method uses {@link #get()} to retrieve the contents of the file. 537 * 538 * @return The contents of the file, as a string, if available, or null. 539 * @throws IOException if an I/O error occurs 540 * @throws OutOfMemoryError See {@link Files#readAllBytes(Path)}: If a string of the required size cannot be allocated, 541 * for example the file is larger than {@code 2GB}. If so, you should use {@link #getReader()}. 542 * @throws UnsupportedEncodingException The character set, which is 543 * specified in the files "content-type" header, is invalid. 544 * @deprecated Since 2.0.0, use {@link #getReader()} instead. 545 */ 546 @Override 547 public String getString() throws IOException, UnsupportedEncodingException, OutOfMemoryError { 548 final byte[] bytes = get(); 549 return bytes == null ? null : new String(bytes, getCharset()); 550 } 551 552 /** 553 * Gets the contents of the file as a String, using the specified encoding. This method uses {@link #get()} to retrieve the contents of the file. 554 * 555 * @param charset The charset to use. 556 * @return The contents of the file, as a string. 557 * @throws IOException if an I/O error occurs 558 */ 559 @Override 560 public String getString(final Charset charset) throws IOException { 561 return new String(get(), Charsets.toCharset(charset, charsetDefault)); 562 } 563 564 /** 565 * Returns the file items threshold. 566 * @return The threshold. 567 */ 568 public int getThreshold() { 569 return threshold; 570 } 571 572 /** 573 * Tests whether or not a {@code FileItem} instance represents a simple form field. 574 * 575 * @return {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file. 576 * @see #setFormField(boolean) 577 */ 578 @Override 579 public boolean isFormField() { 580 return isFormField; 581 } 582 583 /** 584 * Provides a hint as to whether or not the file contents will be read from memory. 585 * 586 * @return {@code true} if the file contents will be read from memory; {@code false} otherwise. 587 */ 588 @Override 589 public boolean isInMemory() { 590 return dos == null || dos.isInMemory(); 591 } 592 593 /** 594 * Sets the default charset for use when no explicit charset parameter is provided by the sender. 595 * 596 * @param charset the default charset 597 * @return {@code this} instance. 598 */ 599 public DiskFileItem setCharsetDefault(final Charset charset) { 600 charsetDefault = charset; 601 return this; 602 } 603 604 /** 605 * Sets the field name used to reference this file item. 606 * 607 * @param fieldName The name of the form field. 608 * @see #getFieldName() 609 */ 610 @Override 611 public DiskFileItem setFieldName(final String fieldName) { 612 this.fieldName = fieldName; 613 return this; 614 } 615 616 /** 617 * Sets the {@link FileCleaningTracker}, which is being used to remove 618 * temporary files. 619 * @param fileCleaningTracker The {@link FileCleaningTracker}, which is being used to 620 * remove temporary files. 621 */ 622 public void setFileCleaningTracker(final FileCleaningTracker fileCleaningTracker) { 623 this.fileCleaningTracker = fileCleaningTracker; 624 } 625 626 /** 627 * Specifies whether or not a {@code FileItem} instance represents a simple form field. 628 * 629 * @param state {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file. 630 * @see #isFormField() 631 */ 632 @Override 633 public DiskFileItem setFormField(final boolean state) { 634 isFormField = state; 635 return this; 636 } 637 638 /** 639 * Sets the file item headers. 640 * 641 * @param headers The file items headers. 642 */ 643 @Override 644 public DiskFileItem setHeaders(final FileItemHeaders headers) { 645 this.fileItemHeaders = headers; 646 return this; 647 } 648 649 /** 650 * Returns a string representation of this object. 651 * 652 * @return a string representation of this object. 653 */ 654 @Override 655 public String toString() { 656 return String.format("name=%s, StoreLocation=%s, size=%s bytes, isFormField=%s, FieldName=%s", getName(), getPath(), getSize(), isFormField(), 657 getFieldName()); 658 } 659 660 /** 661 * Writes an uploaded item to disk. 662 * <p> 663 * The client code is not concerned with whether or not the item is stored in memory, or on disk in a temporary location. They just want to write the 664 * uploaded item to a file. 665 * </p> 666 * <p> 667 * This implementation first attempts to rename the uploaded item to the specified destination file, if the item was originally written to disk. Otherwise, 668 * the data will be copied to the specified file. 669 * </p> 670 * <p> 671 * This method is only guaranteed to work <em>once</em>, the first time it is invoked for a particular item. This is because, in the event that the method 672 * renames a temporary file, that file will no longer be available to copy or rename again at a later time. 673 * </p> 674 * 675 * @param file The {@code File} into which the uploaded item should be stored. 676 * @throws IOException if an error occurs. 677 */ 678 @Override 679 public DiskFileItem write(final Path file) throws IOException { 680 if (isInMemory()) { 681 try (var fout = Files.newOutputStream(file)) { 682 fout.write(get()); 683 } catch (final IOException e) { 684 throw new IOException("Unexpected output data", e); 685 } 686 } else { 687 final var outputFile = getPath(); 688 if (outputFile == null) { 689 /* 690 * For whatever reason we cannot write the file to disk. 691 */ 692 throw new FileUploadException("Cannot write uploaded file to disk."); 693 } 694 // 695 // The uploaded file is being stored on disk in a temporary location so move it to the desired file. 696 // 697 Files.move(outputFile, file, StandardCopyOption.REPLACE_EXISTING); 698 } 699 return this; 700 } 701}