2019-03-10 01:33:56 +08:00
|
|
|
//===- InMemoryModuleCache.cpp - Cache for loaded memory buffers ----------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "clang/Serialization/InMemoryModuleCache.h"
|
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
|
|
|
|
|
|
|
using namespace clang;
|
|
|
|
|
2020-03-11 01:59:26 +08:00
|
|
|
InMemoryModuleCache::State
|
|
|
|
InMemoryModuleCache::getPCMState(llvm::StringRef Filename) const {
|
|
|
|
auto I = PCMs.find(Filename);
|
|
|
|
if (I == PCMs.end())
|
|
|
|
return Unknown;
|
|
|
|
if (I->second.IsFinal)
|
|
|
|
return Final;
|
|
|
|
return I->second.Buffer ? Tentative : ToBuild;
|
|
|
|
}
|
|
|
|
|
2019-03-10 01:33:56 +08:00
|
|
|
llvm::MemoryBuffer &
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
InMemoryModuleCache::addPCM(llvm::StringRef Filename,
|
|
|
|
std::unique_ptr<llvm::MemoryBuffer> Buffer) {
|
|
|
|
auto Insertion = PCMs.insert(std::make_pair(Filename, std::move(Buffer)));
|
|
|
|
assert(Insertion.second && "Already has a PCM");
|
2019-03-10 01:33:56 +08:00
|
|
|
return *Insertion.first->second.Buffer;
|
|
|
|
}
|
|
|
|
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
llvm::MemoryBuffer &
|
2020-03-11 01:59:26 +08:00
|
|
|
InMemoryModuleCache::addBuiltPCM(llvm::StringRef Filename,
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
std::unique_ptr<llvm::MemoryBuffer> Buffer) {
|
|
|
|
auto &PCM = PCMs[Filename];
|
|
|
|
assert(!PCM.IsFinal && "Trying to override finalized PCM?");
|
2020-03-11 01:59:26 +08:00
|
|
|
assert(!PCM.Buffer && "Trying to override tentative PCM?");
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
PCM.Buffer = std::move(Buffer);
|
|
|
|
PCM.IsFinal = true;
|
|
|
|
return *PCM.Buffer;
|
|
|
|
}
|
|
|
|
|
2019-03-10 01:33:56 +08:00
|
|
|
llvm::MemoryBuffer *
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
InMemoryModuleCache::lookupPCM(llvm::StringRef Filename) const {
|
2019-03-10 01:33:56 +08:00
|
|
|
auto I = PCMs.find(Filename);
|
|
|
|
if (I == PCMs.end())
|
|
|
|
return nullptr;
|
|
|
|
return I->second.Buffer.get();
|
|
|
|
}
|
|
|
|
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
bool InMemoryModuleCache::isPCMFinal(llvm::StringRef Filename) const {
|
2020-03-11 01:59:26 +08:00
|
|
|
return getPCMState(Filename) == Final;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool InMemoryModuleCache::shouldBuildPCM(llvm::StringRef Filename) const {
|
|
|
|
return getPCMState(Filename) == ToBuild;
|
2019-03-10 01:33:56 +08:00
|
|
|
}
|
|
|
|
|
2020-03-11 01:59:26 +08:00
|
|
|
bool InMemoryModuleCache::tryToDropPCM(llvm::StringRef Filename) {
|
2019-03-10 01:33:56 +08:00
|
|
|
auto I = PCMs.find(Filename);
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
assert(I != PCMs.end() && "PCM to remove is unknown...");
|
|
|
|
|
|
|
|
auto &PCM = I->second;
|
2020-03-11 01:59:26 +08:00
|
|
|
assert(PCM.Buffer && "PCM to remove is scheduled to be built...");
|
|
|
|
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
if (PCM.IsFinal)
|
2019-03-10 01:33:56 +08:00
|
|
|
return true;
|
|
|
|
|
2020-03-11 01:59:26 +08:00
|
|
|
PCM.Buffer.reset();
|
2019-03-10 01:33:56 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Modules: Invalidate out-of-date PCMs as they're discovered
Leverage the InMemoryModuleCache to invalidate a module the first time
it fails to import (and to lock a module as soon as it's built or
imported successfully). For implicit module builds, this optimizes
importing deep graphs where the leaf module is out-of-date; see example
near the end of the commit message.
Previously the cache finalized ("locked in") all modules imported so far
when starting a new module build. This was sufficient to prevent
loading two versions of the same module, but was somewhat arbitrary and
hard to reason about.
Now the cache explicitly tracks module state, where each module must be
one of:
- Unknown: module not in the cache (yet).
- Tentative: module in the cache, but not yet fully imported.
- ToBuild: module found on disk could not be imported; need to build.
- Final: module in the cache has been successfully built or imported.
Preventing repeated failed imports avoids variation in builds based on
shifting filesystem state. Now it's guaranteed that a module is loaded
from disk exactly once. It now seems safe to remove
FileManager::invalidateCache, but I'm leaving that for a later commit.
The new, precise logic uncovered a pre-existing problem in the cache:
the map key is the module filename, and different contexts use different
filenames for the same PCM file. (In particular, the test
Modules/relative-import-path.c does not build without this commit.
r223577 started using a relative path to describe a module's base
directory when importing it within another module. As a result, the
module cache sees an absolute path when (a) building the module or
importing it at the top-level, and a relative path when (b) importing
the module underneath another one.)
The "obvious" fix is to resolve paths using FileManager::getVirtualFile
and change the map key for the cache to a FileEntry, but some contexts
(particularly related to ASTUnit) have a shorter lifetime for their
FileManager than the InMemoryModuleCache. This is worth pursuing
further in a later commit; perhaps by tying together the FileManager and
InMemoryModuleCache lifetime, or moving the in-memory PCM storage into a
VFS layer.
For now, use the PCM's base directory as-written for constructing the
filename to check the ModuleCache.
Example
=======
To understand the build optimization, first consider the build of a
module graph TU -> A -> B -> C -> D with an empty cache:
TU builds A'
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
If we build TU again, where A, B, C, and D are in the cache and D is
out-of-date, we would previously get this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A'
A' imports B
imports C
imports D (out-of-date)
builds B'
B' imports C
imports D (out-of-date)
builds C'
C' imports D (out-of-date)
builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
After this commit, we'll immediateley invalidate A, B, C, and D when we
first observe that D is out-of-date, giving this build:
TU imports A
imports B
imports C
imports D (out-of-date)
TU builds A' // The same graph as an empty cache.
A' builds B'
B' builds C'
C' builds D'
imports D'
B' imports C'
imports D'
A' imports B'
imports C'
imports D'
TU imports A'
imports B'
imports C'
imports D'
The new build matches what we'd naively expect, pretty closely matching
the original build with the empty cache.
rdar://problem/48545366
llvm-svn: 355778
2019-03-10 01:44:01 +08:00
|
|
|
void InMemoryModuleCache::finalizePCM(llvm::StringRef Filename) {
|
|
|
|
auto I = PCMs.find(Filename);
|
|
|
|
assert(I != PCMs.end() && "PCM to finalize is unknown...");
|
|
|
|
|
|
|
|
auto &PCM = I->second;
|
|
|
|
assert(PCM.Buffer && "Trying to finalize a dropped PCM...");
|
|
|
|
PCM.IsFinal = true;
|
2019-03-10 01:33:56 +08:00
|
|
|
}
|