diff --git a/src/librustc/dep_graph/dep_node.rs b/src/librustc/dep_graph/dep_node.rs
index 29b94986a5f3a..eb7e2871bfcd8 100644
--- a/src/librustc/dep_graph/dep_node.rs
+++ b/src/librustc/dep_graph/dep_node.rs
@@ -76,10 +76,6 @@ macro_rules! erase {
     ($x:tt) => {{}};
 }
 
-macro_rules! replace {
-    ($x:tt with $($y:tt)*) => ($($y)*)
-}
-
 macro_rules! is_anon_attr {
     (anon) => {
         true
@@ -99,19 +95,18 @@ macro_rules! is_eval_always_attr {
 }
 
 macro_rules! contains_anon_attr {
-    ($($attr:ident),*) => ({$(is_anon_attr!($attr) | )* false});
+    ($($attr:ident $(($($attr_args:tt)*))* ),*) => ({$(is_anon_attr!($attr) | )* false});
 }
 
 macro_rules! contains_eval_always_attr {
-    ($($attr:ident),*) => ({$(is_eval_always_attr!($attr) | )* false});
+    ($($attr:ident $(($($attr_args:tt)*))* ),*) => ({$(is_eval_always_attr!($attr) | )* false});
 }
 
 macro_rules! define_dep_nodes {
     (<$tcx:tt>
     $(
-        [$($attr:ident),* ]
+        [$($attrs:tt)*]
         $variant:ident $(( $tuple_arg_ty:ty $(,)? ))*
-                       $({ $($struct_arg_name:ident : $struct_arg_ty:ty),* })*
       ,)*
     ) => (
         #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash,
@@ -126,7 +121,7 @@ macro_rules! define_dep_nodes {
                 match *self {
                     $(
                         DepKind :: $variant => {
-                            if contains_anon_attr!($($attr),*) {
+                            if contains_anon_attr!($($attrs)*) {
                                 return false;
                             }
 
@@ -136,13 +131,6 @@ macro_rules! define_dep_nodes {
                                     ::CAN_RECONSTRUCT_QUERY_KEY;
                             })*
 
-                            // struct args
-                            $({
-
-                                return <( $($struct_arg_ty,)* ) as DepNodeParams>
-                                    ::CAN_RECONSTRUCT_QUERY_KEY;
-                            })*
-
                             true
                         }
                     )*
@@ -152,7 +140,7 @@ macro_rules! define_dep_nodes {
             pub fn is_anon(&self) -> bool {
                 match *self {
                     $(
-                        DepKind :: $variant => { contains_anon_attr!($($attr),*) }
+                        DepKind :: $variant => { contains_anon_attr!($($attrs)*) }
                     )*
                 }
             }
@@ -160,7 +148,7 @@ macro_rules! define_dep_nodes {
             pub fn is_eval_always(&self) -> bool {
                 match *self {
                     $(
-                        DepKind :: $variant => { contains_eval_always_attr!($($attr), *) }
+                        DepKind :: $variant => { contains_eval_always_attr!($($attrs)*) }
                     )*
                 }
             }
@@ -176,12 +164,6 @@ macro_rules! define_dep_nodes {
                                 return true;
                             })*
 
-                            // struct args
-                            $({
-                                $(erase!($struct_arg_name);)*
-                                return true;
-                            })*
-
                             false
                         }
                     )*
@@ -189,11 +171,43 @@ macro_rules! define_dep_nodes {
             }
         }
 
-        pub enum DepConstructor<$tcx> {
+        pub struct DepConstructor;
+
+        impl DepConstructor {
             $(
-                $variant $(( $tuple_arg_ty ))*
-                         $({ $($struct_arg_name : $struct_arg_ty),* })*
-            ),*
+                #[inline(always)]
+                #[allow(unreachable_code, non_snake_case)]
+                pub fn $variant<'tcx>(_tcx: TyCtxt<'tcx>, $(arg: $tuple_arg_ty)*) -> DepNode {
+                    // tuple args
+                    $({
+                        erase!($tuple_arg_ty);
+                        let hash = DepNodeParams::to_fingerprint(&arg, _tcx);
+                        let dep_node = DepNode {
+                            kind: DepKind::$variant,
+                            hash
+                        };
+
+                        #[cfg(debug_assertions)]
+                        {
+                            if !dep_node.kind.can_reconstruct_query_key() &&
+                            (_tcx.sess.opts.debugging_opts.incremental_info ||
+                                _tcx.sess.opts.debugging_opts.query_dep_graph)
+                            {
+                                _tcx.dep_graph.register_dep_node_debug_str(dep_node, || {
+                                    arg.to_debug_str(_tcx)
+                                });
+                            }
+                        }
+
+                        return dep_node;
+                    })*
+
+                    DepNode {
+                        kind: DepKind::$variant,
+                        hash: Fingerprint::ZERO,
+                    }
+                }
+            )*
         }
 
         #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash,
@@ -204,75 +218,6 @@ macro_rules! define_dep_nodes {
         }
 
         impl DepNode {
-            #[allow(unreachable_code, non_snake_case)]
-            pub fn new<'tcx>(tcx: TyCtxt<'tcx>,
-                                       dep: DepConstructor<'tcx>)
-                                       -> DepNode
-            {
-                match dep {
-                    $(
-                        DepConstructor :: $variant $(( replace!(($tuple_arg_ty) with arg) ))*
-                                                   $({ $($struct_arg_name),* })*
-                            =>
-                        {
-                            // tuple args
-                            $({
-                                erase!($tuple_arg_ty);
-                                let hash = DepNodeParams::to_fingerprint(&arg, tcx);
-                                let dep_node = DepNode {
-                                    kind: DepKind::$variant,
-                                    hash
-                                };
-
-                                #[cfg(debug_assertions)]
-                                {
-                                    if !dep_node.kind.can_reconstruct_query_key() &&
-                                    (tcx.sess.opts.debugging_opts.incremental_info ||
-                                        tcx.sess.opts.debugging_opts.query_dep_graph)
-                                    {
-                                        tcx.dep_graph.register_dep_node_debug_str(dep_node, || {
-                                            arg.to_debug_str(tcx)
-                                        });
-                                    }
-                                }
-
-                                return dep_node;
-                            })*
-
-                            // struct args
-                            $({
-                                let tupled_args = ( $($struct_arg_name,)* );
-                                let hash = DepNodeParams::to_fingerprint(&tupled_args,
-                                                                         tcx);
-                                let dep_node = DepNode {
-                                    kind: DepKind::$variant,
-                                    hash
-                                };
-
-                                #[cfg(debug_assertions)]
-                                {
-                                    if !dep_node.kind.can_reconstruct_query_key() &&
-                                    (tcx.sess.opts.debugging_opts.incremental_info ||
-                                        tcx.sess.opts.debugging_opts.query_dep_graph)
-                                    {
-                                        tcx.dep_graph.register_dep_node_debug_str(dep_node, || {
-                                            tupled_args.to_debug_str(tcx)
-                                        });
-                                    }
-                                }
-
-                                return dep_node;
-                            })*
-
-                            DepNode {
-                                kind: DepKind::$variant,
-                                hash: Fingerprint::ZERO,
-                            }
-                        }
-                    )*
-                }
-            }
-
             /// Construct a DepNode from the given DepKind and DefPathHash. This
             /// method will assert that the given DepKind actually requires a
             /// single DefId/DefPathHash parameter.
diff --git a/src/librustc/dep_graph/graph.rs b/src/librustc/dep_graph/graph.rs
index 258723bb39d83..531a45b120c24 100644
--- a/src/librustc/dep_graph/graph.rs
+++ b/src/librustc/dep_graph/graph.rs
@@ -1122,6 +1122,7 @@ impl CurrentDepGraph {
 }
 
 impl DepGraphData {
+    #[inline(never)]
     fn read_index(&self, source: DepNodeIndex) {
         ty::tls::with_context_opt(|icx| {
             let icx = if let Some(icx) = icx { icx } else { return };
diff --git a/src/librustc/mir/mono.rs b/src/librustc/mir/mono.rs
index 6da7c09c7df9e..9a3ddfb0e82c9 100644
--- a/src/librustc/mir/mono.rs
+++ b/src/librustc/mir/mono.rs
@@ -362,7 +362,7 @@ impl<'tcx> CodegenUnit<'tcx> {
     }
 
     pub fn codegen_dep_node(&self, tcx: TyCtxt<'tcx>) -> DepNode {
-        DepNode::new(tcx, DepConstructor::CompileCodegenUnit(self.name()))
+        DepConstructor::CompileCodegenUnit(tcx, self.name())
     }
 }
 
diff --git a/src/librustc/ty/context.rs b/src/librustc/ty/context.rs
index 5a415fa954f0d..e59738d888608 100644
--- a/src/librustc/ty/context.rs
+++ b/src/librustc/ty/context.rs
@@ -2,7 +2,7 @@
 
 use crate::arena::Arena;
 use crate::dep_graph::DepGraph;
-use crate::dep_graph::{self, DepConstructor, DepNode};
+use crate::dep_graph::{self, DepConstructor};
 use crate::hir::exports::Export;
 use crate::hir::map as hir_map;
 use crate::hir::map::DefPathHash;
@@ -1347,7 +1347,7 @@ impl<'tcx> TyCtxt<'tcx> {
         // We cannot use the query versions of crates() and crate_hash(), since
         // those would need the DepNodes that we are allocating here.
         for cnum in self.cstore.crates_untracked() {
-            let dep_node = DepNode::new(self, DepConstructor::CrateMetadata(cnum));
+            let dep_node = DepConstructor::CrateMetadata(self, cnum);
             let crate_hash = self.cstore.crate_hash_untracked(cnum);
             self.dep_graph.with_task(
                 dep_node,
@@ -1688,6 +1688,7 @@ pub mod tls {
 
     /// Gets the pointer to the current `ImplicitCtxt`.
     #[cfg(not(parallel_compiler))]
+    #[inline]
     fn get_tlv() -> usize {
         TLV.with(|tlv| tlv.get())
     }
diff --git a/src/librustc/ty/query/caches.rs b/src/librustc/ty/query/caches.rs
new file mode 100644
index 0000000000000..efc2804bd4d59
--- /dev/null
+++ b/src/librustc/ty/query/caches.rs
@@ -0,0 +1,112 @@
+use crate::dep_graph::DepNodeIndex;
+use crate::ty::query::config::QueryAccessors;
+use crate::ty::query::plumbing::{QueryLookup, QueryState, QueryStateShard};
+use crate::ty::TyCtxt;
+
+use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::sharded::Sharded;
+use std::default::Default;
+use std::hash::Hash;
+
+pub(crate) trait CacheSelector<K, V> {
+    type Cache: QueryCache<K, V>;
+}
+
+pub(crate) trait QueryCache<K, V>: Default {
+    type Sharded: Default;
+
+    /// Checks if the query is already computed and in the cache.
+    /// It returns the shard index and a lock guard to the shard,
+    /// which will be used if the query is not in the cache and we need
+    /// to compute it.
+    fn lookup<'tcx, R, GetCache, OnHit, OnMiss, Q>(
+        &self,
+        state: &'tcx QueryState<'tcx, Q>,
+        get_cache: GetCache,
+        key: K,
+        // `on_hit` can be called while holding a lock to the query state shard.
+        on_hit: OnHit,
+        on_miss: OnMiss,
+    ) -> R
+    where
+        Q: QueryAccessors<'tcx>,
+        GetCache: for<'a> Fn(&'a mut QueryStateShard<'tcx, Q>) -> &'a mut Self::Sharded,
+        OnHit: FnOnce(&V, DepNodeIndex) -> R,
+        OnMiss: FnOnce(K, QueryLookup<'tcx, Q>) -> R;
+
+    fn complete(
+        &self,
+        tcx: TyCtxt<'tcx>,
+        lock_sharded_storage: &mut Self::Sharded,
+        key: K,
+        value: V,
+        index: DepNodeIndex,
+    );
+
+    fn iter<R, L>(
+        &self,
+        shards: &Sharded<L>,
+        get_shard: impl Fn(&mut L) -> &mut Self::Sharded,
+        f: impl for<'a> FnOnce(Box<dyn Iterator<Item = (&'a K, &'a V, DepNodeIndex)> + 'a>) -> R,
+    ) -> R;
+}
+
+pub struct DefaultCacheSelector;
+
+impl<K: Eq + Hash, V: Clone> CacheSelector<K, V> for DefaultCacheSelector {
+    type Cache = DefaultCache;
+}
+
+#[derive(Default)]
+pub struct DefaultCache;
+
+impl<K: Eq + Hash, V: Clone> QueryCache<K, V> for DefaultCache {
+    type Sharded = FxHashMap<K, (V, DepNodeIndex)>;
+
+    #[inline(always)]
+    fn lookup<'tcx, R, GetCache, OnHit, OnMiss, Q>(
+        &self,
+        state: &'tcx QueryState<'tcx, Q>,
+        get_cache: GetCache,
+        key: K,
+        on_hit: OnHit,
+        on_miss: OnMiss,
+    ) -> R
+    where
+        Q: QueryAccessors<'tcx>,
+        GetCache: for<'a> Fn(&'a mut QueryStateShard<'tcx, Q>) -> &'a mut Self::Sharded,
+        OnHit: FnOnce(&V, DepNodeIndex) -> R,
+        OnMiss: FnOnce(K, QueryLookup<'tcx, Q>) -> R,
+    {
+        let mut lookup = state.get_lookup(&key);
+        let lock = &mut *lookup.lock;
+
+        let result = get_cache(lock).raw_entry().from_key_hashed_nocheck(lookup.key_hash, &key);
+
+        if let Some((_, value)) = result { on_hit(&value.0, value.1) } else { on_miss(key, lookup) }
+    }
+
+    #[inline]
+    fn complete(
+        &self,
+        _: TyCtxt<'tcx>,
+        lock_sharded_storage: &mut Self::Sharded,
+        key: K,
+        value: V,
+        index: DepNodeIndex,
+    ) {
+        lock_sharded_storage.insert(key, (value, index));
+    }
+
+    fn iter<R, L>(
+        &self,
+        shards: &Sharded<L>,
+        get_shard: impl Fn(&mut L) -> &mut Self::Sharded,
+        f: impl for<'a> FnOnce(Box<dyn Iterator<Item = (&'a K, &'a V, DepNodeIndex)> + 'a>) -> R,
+    ) -> R {
+        let mut shards = shards.lock_shards();
+        let mut shards: Vec<_> = shards.iter_mut().map(|shard| get_shard(shard)).collect();
+        let results = shards.iter_mut().flat_map(|shard| shard.iter()).map(|(k, v)| (k, &v.0, v.1));
+        f(Box::new(results))
+    }
+}
diff --git a/src/librustc/ty/query/config.rs b/src/librustc/ty/query/config.rs
index dbb6a1080e6d4..e0e1ca374d9ae 100644
--- a/src/librustc/ty/query/config.rs
+++ b/src/librustc/ty/query/config.rs
@@ -1,15 +1,15 @@
 use crate::dep_graph::SerializedDepNodeIndex;
 use crate::dep_graph::{DepKind, DepNode};
+use crate::ty::query::caches::QueryCache;
 use crate::ty::query::plumbing::CycleError;
 use crate::ty::query::queries;
-use crate::ty::query::{Query, QueryCache};
+use crate::ty::query::{Query, QueryState};
 use crate::ty::TyCtxt;
 use rustc_data_structures::profiling::ProfileCategory;
 use rustc_hir::def_id::{CrateNum, DefId};
 
 use crate::ich::StableHashingContext;
 use rustc_data_structures::fingerprint::Fingerprint;
-use rustc_data_structures::sharded::Sharded;
 use std::borrow::Cow;
 use std::fmt::Debug;
 use std::hash::Hash;
@@ -30,10 +30,12 @@ pub(crate) trait QueryAccessors<'tcx>: QueryConfig<'tcx> {
     const ANON: bool;
     const EVAL_ALWAYS: bool;
 
+    type Cache: QueryCache<Self::Key, Self::Value>;
+
     fn query(key: Self::Key) -> Query<'tcx>;
 
     // Don't use this method to access query results, instead use the methods on TyCtxt
-    fn query_cache<'a>(tcx: TyCtxt<'tcx>) -> &'a Sharded<QueryCache<'tcx, Self>>;
+    fn query_state<'a>(tcx: TyCtxt<'tcx>) -> &'a QueryState<'tcx, Self>;
 
     fn to_dep_node(tcx: TyCtxt<'tcx>, key: &Self::Key) -> DepNode;
 
@@ -61,7 +63,10 @@ pub(crate) trait QueryDescription<'tcx>: QueryAccessors<'tcx> {
     }
 }
 
-impl<'tcx, M: QueryAccessors<'tcx, Key = DefId>> QueryDescription<'tcx> for M {
+impl<'tcx, M: QueryAccessors<'tcx, Key = DefId>> QueryDescription<'tcx> for M
+where
+    <M as QueryAccessors<'tcx>>::Cache: QueryCache<DefId, <M as QueryConfig<'tcx>>::Value>,
+{
     default fn describe(tcx: TyCtxt<'_>, def_id: DefId) -> Cow<'static, str> {
         if !tcx.sess.verbose() {
             format!("processing `{}`", tcx.def_path_str(def_id)).into()
diff --git a/src/librustc/ty/query/keys.rs b/src/librustc/ty/query/keys.rs
index c1c88e96f94b5..09fb307a1ceb4 100644
--- a/src/librustc/ty/query/keys.rs
+++ b/src/librustc/ty/query/keys.rs
@@ -4,6 +4,7 @@ use crate::infer::canonical::Canonical;
 use crate::mir;
 use crate::traits;
 use crate::ty::fast_reject::SimplifiedType;
+use crate::ty::query::caches::DefaultCacheSelector;
 use crate::ty::subst::SubstsRef;
 use crate::ty::{self, Ty, TyCtxt};
 use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LOCAL_CRATE};
@@ -12,7 +13,9 @@ use rustc_span::{Span, DUMMY_SP};
 
 /// The `Key` trait controls what types can legally be used as the key
 /// for a query.
-pub(super) trait Key {
+pub trait Key {
+    type CacheSelector;
+
     /// Given an instance of this key, what crate is it referring to?
     /// This is used to find the provider.
     fn query_crate(&self) -> CrateNum;
@@ -23,6 +26,8 @@ pub(super) trait Key {
 }
 
 impl<'tcx> Key for ty::InstanceDef<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -33,6 +38,8 @@ impl<'tcx> Key for ty::InstanceDef<'tcx> {
 }
 
 impl<'tcx> Key for ty::Instance<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -43,6 +50,8 @@ impl<'tcx> Key for ty::Instance<'tcx> {
 }
 
 impl<'tcx> Key for mir::interpret::GlobalId<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.instance.query_crate()
     }
@@ -53,6 +62,8 @@ impl<'tcx> Key for mir::interpret::GlobalId<'tcx> {
 }
 
 impl<'tcx> Key for mir::interpret::LitToConstInput<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -63,6 +74,8 @@ impl<'tcx> Key for mir::interpret::LitToConstInput<'tcx> {
 }
 
 impl Key for CrateNum {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         *self
     }
@@ -72,6 +85,8 @@ impl Key for CrateNum {
 }
 
 impl Key for DefIndex {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -81,6 +96,8 @@ impl Key for DefIndex {
 }
 
 impl Key for DefId {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.krate
     }
@@ -90,6 +107,8 @@ impl Key for DefId {
 }
 
 impl Key for (DefId, DefId) {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.0.krate
     }
@@ -99,6 +118,8 @@ impl Key for (DefId, DefId) {
 }
 
 impl Key for (CrateNum, DefId) {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.0
     }
@@ -108,6 +129,8 @@ impl Key for (CrateNum, DefId) {
 }
 
 impl Key for (DefId, SimplifiedType) {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.0.krate
     }
@@ -117,6 +140,8 @@ impl Key for (DefId, SimplifiedType) {
 }
 
 impl<'tcx> Key for SubstsRef<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -126,6 +151,8 @@ impl<'tcx> Key for SubstsRef<'tcx> {
 }
 
 impl<'tcx> Key for (DefId, SubstsRef<'tcx>) {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.0.krate
     }
@@ -135,6 +162,8 @@ impl<'tcx> Key for (DefId, SubstsRef<'tcx>) {
 }
 
 impl<'tcx> Key for (ty::ParamEnv<'tcx>, ty::PolyTraitRef<'tcx>) {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.1.def_id().krate
     }
@@ -144,6 +173,8 @@ impl<'tcx> Key for (ty::ParamEnv<'tcx>, ty::PolyTraitRef<'tcx>) {
 }
 
 impl<'tcx> Key for (&'tcx ty::Const<'tcx>, mir::Field) {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -153,6 +184,8 @@ impl<'tcx> Key for (&'tcx ty::Const<'tcx>, mir::Field) {
 }
 
 impl<'tcx> Key for ty::PolyTraitRef<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.def_id().krate
     }
@@ -162,6 +195,8 @@ impl<'tcx> Key for ty::PolyTraitRef<'tcx> {
 }
 
 impl<'tcx> Key for &'tcx ty::Const<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -171,6 +206,8 @@ impl<'tcx> Key for &'tcx ty::Const<'tcx> {
 }
 
 impl<'tcx> Key for Ty<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -180,6 +217,8 @@ impl<'tcx> Key for Ty<'tcx> {
 }
 
 impl<'tcx> Key for ty::ParamEnv<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -189,6 +228,8 @@ impl<'tcx> Key for ty::ParamEnv<'tcx> {
 }
 
 impl<'tcx, T: Key> Key for ty::ParamEnvAnd<'tcx, T> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         self.value.query_crate()
     }
@@ -198,6 +239,8 @@ impl<'tcx, T: Key> Key for ty::ParamEnvAnd<'tcx, T> {
 }
 
 impl<'tcx> Key for traits::Environment<'tcx> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -207,6 +250,8 @@ impl<'tcx> Key for traits::Environment<'tcx> {
 }
 
 impl Key for Symbol {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -218,6 +263,8 @@ impl Key for Symbol {
 /// Canonical query goals correspond to abstract trait operations that
 /// are not tied to any crate in particular.
 impl<'tcx, T> Key for Canonical<'tcx, T> {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
@@ -228,6 +275,8 @@ impl<'tcx, T> Key for Canonical<'tcx, T> {
 }
 
 impl Key for (Symbol, u32, u32) {
+    type CacheSelector = DefaultCacheSelector;
+
     fn query_crate(&self) -> CrateNum {
         LOCAL_CRATE
     }
diff --git a/src/librustc/ty/query/mod.rs b/src/librustc/ty/query/mod.rs
index 21698cd737485..381a7b1f03ff7 100644
--- a/src/librustc/ty/query/mod.rs
+++ b/src/librustc/ty/query/mod.rs
@@ -1,4 +1,4 @@
-use crate::dep_graph::{self, DepNode};
+use crate::dep_graph::{self, DepConstructor, DepNode};
 use crate::hir::exports::Export;
 use crate::infer::canonical::{self, Canonical};
 use crate::lint::LintLevelMap;
@@ -52,7 +52,6 @@ use rustc_target::spec::PanicStrategy;
 use rustc_attr as attr;
 use rustc_span::symbol::Symbol;
 use rustc_span::{Span, DUMMY_SP};
-use std::any::type_name;
 use std::borrow::Cow;
 use std::convert::TryFrom;
 use std::ops::Deref;
@@ -64,6 +63,9 @@ mod plumbing;
 use self::plumbing::*;
 pub use self::plumbing::{force_from_dep_node, CycleError};
 
+mod stats;
+pub use self::stats::print_stats;
+
 mod job;
 #[cfg(parallel_compiler)]
 pub use self::job::handle_deadlock;
@@ -76,6 +78,9 @@ use self::keys::Key;
 mod values;
 use self::values::Value;
 
+mod caches;
+use self::caches::CacheSelector;
+
 mod config;
 use self::config::QueryAccessors;
 pub use self::config::QueryConfig;
diff --git a/src/librustc/ty/query/on_disk_cache.rs b/src/librustc/ty/query/on_disk_cache.rs
index 45d95e97a9cf0..b92081ff7c05f 100644
--- a/src/librustc/ty/query/on_disk_cache.rs
+++ b/src/librustc/ty/query/on_disk_cache.rs
@@ -1035,20 +1035,22 @@ where
         .prof
         .extra_verbose_generic_activity("encode_query_results_for", ::std::any::type_name::<Q>());
 
-    let shards = Q::query_cache(tcx).lock_shards();
-    assert!(shards.iter().all(|shard| shard.active.is_empty()));
-    for (key, entry) in shards.iter().flat_map(|shard| shard.results.iter()) {
-        if Q::cache_on_disk(tcx, key.clone(), Some(&entry.value)) {
-            let dep_node = SerializedDepNodeIndex::new(entry.index.index());
-
-            // Record position of the cache entry.
-            query_result_index.push((dep_node, AbsoluteBytePos::new(encoder.position())));
-
-            // Encode the type check tables with the `SerializedDepNodeIndex`
-            // as tag.
-            encoder.encode_tagged(dep_node, &entry.value)?;
-        }
-    }
+    let state = Q::query_state(tcx);
+    assert!(state.all_inactive());
+
+    state.iter_results(|results| {
+        for (key, value, dep_node) in results {
+            if Q::cache_on_disk(tcx, key.clone(), Some(&value)) {
+                let dep_node = SerializedDepNodeIndex::new(dep_node.index());
+
+                // Record position of the cache entry.
+                query_result_index.push((dep_node, AbsoluteBytePos::new(encoder.position())));
 
-    Ok(())
+                // Encode the type check tables with the `SerializedDepNodeIndex`
+                // as tag.
+                encoder.encode_tagged(dep_node, &value)?;
+            }
+        }
+        Ok(())
+    })
 }
diff --git a/src/librustc/ty/query/plumbing.rs b/src/librustc/ty/query/plumbing.rs
index 8b787915de605..a61256b9fcbbc 100644
--- a/src/librustc/ty/query/plumbing.rs
+++ b/src/librustc/ty/query/plumbing.rs
@@ -3,7 +3,8 @@
 //! manage the caches, and so forth.
 
 use crate::dep_graph::{DepKind, DepNode, DepNodeIndex, SerializedDepNodeIndex};
-use crate::ty::query::config::{QueryConfig, QueryDescription};
+use crate::ty::query::caches::QueryCache;
+use crate::ty::query::config::{QueryAccessors, QueryDescription};
 use crate::ty::query::job::{QueryInfo, QueryJob, QueryJobId, QueryShardJobId};
 use crate::ty::query::Query;
 use crate::ty::tls;
@@ -12,10 +13,8 @@ use crate::ty::{self, TyCtxt};
 #[cfg(not(parallel_compiler))]
 use rustc_data_structures::cold_path;
 use rustc_data_structures::fx::{FxHashMap, FxHasher};
-#[cfg(parallel_compiler)]
-use rustc_data_structures::profiling::TimingGuard;
 use rustc_data_structures::sharded::Sharded;
-use rustc_data_structures::sync::Lock;
+use rustc_data_structures::sync::{Lock, LockGuard};
 use rustc_data_structures::thin_vec::ThinVec;
 use rustc_errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler, Level};
 use rustc_span::source_map::DUMMY_SP;
@@ -25,26 +24,50 @@ use std::hash::{Hash, Hasher};
 use std::mem;
 use std::num::NonZeroU32;
 use std::ptr;
+#[cfg(debug_assertions)]
+use std::sync::atomic::{AtomicUsize, Ordering};
 
-pub struct QueryCache<'tcx, D: QueryConfig<'tcx> + ?Sized> {
-    pub(super) results: FxHashMap<D::Key, QueryValue<D::Value>>,
+pub(crate) struct QueryStateShard<'tcx, D: QueryAccessors<'tcx> + ?Sized> {
+    pub(super) cache: <<D as QueryAccessors<'tcx>>::Cache as QueryCache<D::Key, D::Value>>::Sharded,
     pub(super) active: FxHashMap<D::Key, QueryResult<'tcx>>,
 
     /// Used to generate unique ids for active jobs.
     pub(super) jobs: u32,
+}
 
-    #[cfg(debug_assertions)]
-    pub(super) cache_hits: usize,
+impl<'tcx, Q: QueryAccessors<'tcx>> QueryStateShard<'tcx, Q> {
+    fn get_cache(
+        &mut self,
+    ) -> &mut <<Q as QueryAccessors<'tcx>>::Cache as QueryCache<Q::Key, Q::Value>>::Sharded {
+        &mut self.cache
+    }
 }
 
-pub(super) struct QueryValue<T> {
-    pub(super) value: T,
-    pub(super) index: DepNodeIndex,
+impl<'tcx, Q: QueryAccessors<'tcx>> Default for QueryStateShard<'tcx, Q> {
+    fn default() -> QueryStateShard<'tcx, Q> {
+        QueryStateShard { cache: Default::default(), active: Default::default(), jobs: 0 }
+    }
+}
+
+pub(crate) struct QueryState<'tcx, D: QueryAccessors<'tcx> + ?Sized> {
+    pub(super) cache: D::Cache,
+    pub(super) shards: Sharded<QueryStateShard<'tcx, D>>,
+    #[cfg(debug_assertions)]
+    pub(super) cache_hits: AtomicUsize,
 }
 
-impl<T> QueryValue<T> {
-    pub(super) fn new(value: T, dep_node_index: DepNodeIndex) -> QueryValue<T> {
-        QueryValue { value, index: dep_node_index }
+impl<'tcx, Q: QueryAccessors<'tcx>> QueryState<'tcx, Q> {
+    pub(super) fn get_lookup<K: Hash>(&'tcx self, key: &K) -> QueryLookup<'tcx, Q> {
+        // We compute the key's hash once and then use it for both the
+        // shard lookup and the hashmap lookup. This relies on the fact
+        // that both of them use `FxHasher`.
+        let mut hasher = FxHasher::default();
+        key.hash(&mut hasher);
+        let key_hash = hasher.finish();
+
+        let shard = self.shards.get_shard_index_by_hash(key_hash);
+        let lock = self.shards.get_shard_by_index(shard).lock();
+        QueryLookup { key_hash, shard, lock }
     }
 }
 
@@ -58,142 +81,134 @@ pub(super) enum QueryResult<'tcx> {
     Poisoned,
 }
 
-impl<'tcx, M: QueryConfig<'tcx>> Default for QueryCache<'tcx, M> {
-    fn default() -> QueryCache<'tcx, M> {
-        QueryCache {
-            results: FxHashMap::default(),
-            active: FxHashMap::default(),
-            jobs: 0,
+impl<'tcx, M: QueryAccessors<'tcx>> QueryState<'tcx, M> {
+    pub fn iter_results<R>(
+        &self,
+        f: impl for<'a> FnOnce(
+            Box<dyn Iterator<Item = (&'a M::Key, &'a M::Value, DepNodeIndex)> + 'a>,
+        ) -> R,
+    ) -> R {
+        self.cache.iter(&self.shards, |shard| &mut shard.cache, f)
+    }
+    pub fn all_inactive(&self) -> bool {
+        let shards = self.shards.lock_shards();
+        shards.iter().all(|shard| shard.active.is_empty())
+    }
+}
+
+impl<'tcx, M: QueryAccessors<'tcx>> Default for QueryState<'tcx, M> {
+    fn default() -> QueryState<'tcx, M> {
+        QueryState {
+            cache: M::Cache::default(),
+            shards: Default::default(),
             #[cfg(debug_assertions)]
-            cache_hits: 0,
+            cache_hits: AtomicUsize::new(0),
         }
     }
 }
 
+/// Values used when checking a query cache which can be reused on a cache-miss to execute the query.
+pub(crate) struct QueryLookup<'tcx, Q: QueryAccessors<'tcx>> {
+    pub(super) key_hash: u64,
+    pub(super) shard: usize,
+    pub(super) lock: LockGuard<'tcx, QueryStateShard<'tcx, Q>>,
+}
+
 /// A type representing the responsibility to execute the job in the `job` field.
 /// This will poison the relevant query if dropped.
-pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
-    cache: &'a Sharded<QueryCache<'tcx, Q>>,
+pub(super) struct JobOwner<'tcx, Q: QueryDescription<'tcx>> {
+    tcx: TyCtxt<'tcx>,
     key: Q::Key,
     id: QueryJobId,
 }
 
-impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
+impl<'tcx, Q: QueryDescription<'tcx>> JobOwner<'tcx, Q> {
     /// Either gets a `JobOwner` corresponding the query, allowing us to
     /// start executing the query, or returns with the result of the query.
-    /// If the query is executing elsewhere, this will wait for it.
+    /// This function assumes that `try_get_cached` is already called and returned `lookup`.
+    /// If the query is executing elsewhere, this will wait for it and return the result.
     /// If the query panicked, this will silently panic.
     ///
     /// This function is inlined because that results in a noticeable speed-up
     /// for some compile-time benchmarks.
     #[inline(always)]
-    pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> {
-        // Handling the `query_blocked_prof_timer` is a bit weird because of the
-        // control flow in this function: Blocking is implemented by
-        // awaiting a running job and, once that is done, entering the loop below
-        // again from the top. In that second iteration we will hit the
-        // cache which provides us with the information we need for
-        // finishing the "query-blocked" event.
-        //
-        // We thus allocate `query_blocked_prof_timer` outside the loop,
-        // initialize it during the first iteration and finish it during the
-        // second iteration.
-        #[cfg(parallel_compiler)]
-        let mut query_blocked_prof_timer: Option<TimingGuard<'_>> = None;
-
-        let cache = Q::query_cache(tcx);
-        loop {
-            // We compute the key's hash once and then use it for both the
-            // shard lookup and the hashmap lookup. This relies on the fact
-            // that both of them use `FxHasher`.
-            let mut state = FxHasher::default();
-            key.hash(&mut state);
-            let key_hash = state.finish();
-
-            let shard = cache.get_shard_index_by_hash(key_hash);
-            let mut lock_guard = cache.get_shard_by_index(shard).lock();
-            let lock = &mut *lock_guard;
-
-            if let Some((_, value)) =
-                lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key)
-            {
-                if unlikely!(tcx.prof.enabled()) {
-                    tcx.prof.query_cache_hit(value.index.into());
+    pub(super) fn try_start(
+        tcx: TyCtxt<'tcx>,
+        span: Span,
+        key: &Q::Key,
+        mut lookup: QueryLookup<'tcx, Q>,
+    ) -> TryGetJob<'tcx, Q> {
+        let lock = &mut *lookup.lock;
+
+        let (latch, mut _query_blocked_prof_timer) = match lock.active.entry((*key).clone()) {
+            Entry::Occupied(mut entry) => {
+                match entry.get_mut() {
+                    QueryResult::Started(job) => {
+                        // For parallel queries, we'll block and wait until the query running
+                        // in another thread has completed. Record how long we wait in the
+                        // self-profiler.
+                        let _query_blocked_prof_timer = if cfg!(parallel_compiler) {
+                            Some(tcx.prof.query_blocked())
+                        } else {
+                            None
+                        };
 
-                    #[cfg(parallel_compiler)]
-                    {
-                        if let Some(prof_timer) = query_blocked_prof_timer.take() {
-                            prof_timer.finish_with_query_invocation_id(value.index.into());
-                        }
-                    }
-                }
+                        // Create the id of the job we're waiting for
+                        let id = QueryJobId::new(job.id, lookup.shard, Q::dep_kind());
 
-                let result = (value.value.clone(), value.index);
-                #[cfg(debug_assertions)]
-                {
-                    lock.cache_hits += 1;
+                        (job.latch(id), _query_blocked_prof_timer)
+                    }
+                    QueryResult::Poisoned => FatalError.raise(),
                 }
-                return TryGetJob::JobCompleted(result);
             }
+            Entry::Vacant(entry) => {
+                // No job entry for this query. Return a new one to be started later.
 
-            let latch = match lock.active.entry((*key).clone()) {
-                Entry::Occupied(mut entry) => {
-                    match entry.get_mut() {
-                        QueryResult::Started(job) => {
-                            // For parallel queries, we'll block and wait until the query running
-                            // in another thread has completed. Record how long we wait in the
-                            // self-profiler.
-                            #[cfg(parallel_compiler)]
-                            {
-                                query_blocked_prof_timer = Some(tcx.prof.query_blocked());
-                            }
-
-                            // Create the id of the job we're waiting for
-                            let id = QueryJobId::new(job.id, shard, Q::dep_kind());
-
-                            job.latch(id)
-                        }
-                        QueryResult::Poisoned => FatalError.raise(),
-                    }
-                }
-                Entry::Vacant(entry) => {
-                    // No job entry for this query. Return a new one to be started later.
+                // Generate an id unique within this shard.
+                let id = lock.jobs.checked_add(1).unwrap();
+                lock.jobs = id;
+                let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
+
+                let global_id = QueryJobId::new(id, lookup.shard, Q::dep_kind());
 
-                    // Generate an id unique within this shard.
-                    let id = lock.jobs.checked_add(1).unwrap();
-                    lock.jobs = id;
-                    let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
+                let job = tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
 
-                    let global_id = QueryJobId::new(id, shard, Q::dep_kind());
+                entry.insert(QueryResult::Started(job));
 
-                    let job =
-                        tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
+                let owner = JobOwner { tcx, id: global_id, key: (*key).clone() };
+                return TryGetJob::NotYetStarted(owner);
+            }
+        };
+        mem::drop(lookup.lock);
 
-                    entry.insert(QueryResult::Started(job));
+        // If we are single-threaded we know that we have cycle error,
+        // so we just return the error.
+        #[cfg(not(parallel_compiler))]
+        return TryGetJob::Cycle(cold_path(|| {
+            Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
+        }));
 
-                    let owner = JobOwner { cache, id: global_id, key: (*key).clone() };
-                    return TryGetJob::NotYetStarted(owner);
-                }
-            };
-            mem::drop(lock_guard);
+        // With parallel queries we might just have to wait on some other
+        // thread.
+        #[cfg(parallel_compiler)]
+        {
+            let result = latch.wait_on(tcx, span);
 
-            // If we are single-threaded we know that we have cycle error,
-            // so we just return the error.
-            #[cfg(not(parallel_compiler))]
-            return TryGetJob::Cycle(cold_path(|| {
-                Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
-            }));
+            if let Err(cycle) = result {
+                return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
+            }
 
-            // With parallel queries we might just have to wait on some other
-            // thread.
-            #[cfg(parallel_compiler)]
-            {
-                let result = latch.wait_on(tcx, span);
+            let cached = tcx.try_get_cached::<Q, _, _, _>(
+                (*key).clone(),
+                |value, index| (value.clone(), index),
+                |_, _| panic!("value must be in cache after waiting"),
+            );
 
-                if let Err(cycle) = result {
-                    return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
-                }
+            if let Some(prof_timer) = _query_blocked_prof_timer.take() {
+                prof_timer.finish_with_query_invocation_id(cached.1.into());
             }
+
+            return TryGetJob::JobCompleted(cached);
         }
     }
 
@@ -203,19 +218,20 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
     pub(super) fn complete(self, result: &Q::Value, dep_node_index: DepNodeIndex) {
         // We can move out of `self` here because we `mem::forget` it below
         let key = unsafe { ptr::read(&self.key) };
-        let cache = self.cache;
+        let tcx = self.tcx;
 
         // Forget ourself so our destructor won't poison the query
         mem::forget(self);
 
-        let value = QueryValue::new(result.clone(), dep_node_index);
         let job = {
-            let mut lock = cache.get_shard_by_value(&key).lock();
+            let state = Q::query_state(tcx);
+            let result = result.clone();
+            let mut lock = state.shards.get_shard_by_value(&key).lock();
             let job = match lock.active.remove(&key).unwrap() {
                 QueryResult::Started(job) => job,
                 QueryResult::Poisoned => panic!(),
             };
-            lock.results.insert(key, value);
+            state.cache.complete(tcx, &mut lock.cache, key, result, dep_node_index);
             job
         };
 
@@ -233,12 +249,13 @@ where
     (result, diagnostics.into_inner())
 }
 
-impl<'a, 'tcx, Q: QueryDescription<'tcx>> Drop for JobOwner<'a, 'tcx, Q> {
+impl<'tcx, Q: QueryDescription<'tcx>> Drop for JobOwner<'tcx, Q> {
     #[inline(never)]
     #[cold]
     fn drop(&mut self) {
         // Poison the query so jobs waiting on it panic.
-        let shard = self.cache.get_shard_by_value(&self.key);
+        let state = Q::query_state(self.tcx);
+        let shard = state.shards.get_shard_by_value(&self.key);
         let job = {
             let mut shard = shard.lock();
             let job = match shard.active.remove(&self.key).unwrap() {
@@ -261,14 +278,15 @@ pub struct CycleError<'tcx> {
     pub(super) cycle: Vec<QueryInfo<'tcx>>,
 }
 
-/// The result of `try_get_lock`.
-pub(super) enum TryGetJob<'a, 'tcx, D: QueryDescription<'tcx>> {
+/// The result of `try_start`.
+pub(super) enum TryGetJob<'tcx, D: QueryDescription<'tcx>> {
     /// The query is not yet started. Contains a guard to the cache eventually used to start it.
-    NotYetStarted(JobOwner<'a, 'tcx, D>),
+    NotYetStarted(JobOwner<'tcx, D>),
 
     /// The query was already completed.
     /// Returns the result of the query and its dep-node index
     /// if it succeeded or a cycle error if it failed.
+    #[cfg(parallel_compiler)]
     JobCompleted((D::Value, DepNodeIndex)),
 
     /// Trying to execute the query resulted in a cycle.
@@ -396,13 +414,72 @@ impl<'tcx> TyCtxt<'tcx> {
         eprintln!("end of query stack");
     }
 
+    /// Checks if the query is already computed and in the cache.
+    /// It returns the shard index and a lock guard to the shard,
+    /// which will be used if the query is not in the cache and we need
+    /// to compute it.
+    #[inline(always)]
+    fn try_get_cached<Q, R, OnHit, OnMiss>(
+        self,
+        key: Q::Key,
+        // `on_hit` can be called while holding a lock to the query cache
+        on_hit: OnHit,
+        on_miss: OnMiss,
+    ) -> R
+    where
+        Q: QueryDescription<'tcx> + 'tcx,
+        OnHit: FnOnce(&Q::Value, DepNodeIndex) -> R,
+        OnMiss: FnOnce(Q::Key, QueryLookup<'tcx, Q>) -> R,
+    {
+        let state = Q::query_state(self);
+
+        state.cache.lookup(
+            state,
+            QueryStateShard::<Q>::get_cache,
+            key,
+            |value, index| {
+                if unlikely!(self.prof.enabled()) {
+                    self.prof.query_cache_hit(index.into());
+                }
+                #[cfg(debug_assertions)]
+                {
+                    state.cache_hits.fetch_add(1, Ordering::Relaxed);
+                }
+                on_hit(value, index)
+            },
+            on_miss,
+        )
+    }
+
     #[inline(never)]
-    pub(super) fn get_query<Q: QueryDescription<'tcx>>(self, span: Span, key: Q::Key) -> Q::Value {
+    pub(super) fn get_query<Q: QueryDescription<'tcx> + 'tcx>(
+        self,
+        span: Span,
+        key: Q::Key,
+    ) -> Q::Value {
         debug!("ty::query::get_query<{}>(key={:?}, span={:?})", Q::NAME, key, span);
 
-        let job = match JobOwner::try_get(self, span, &key) {
+        self.try_get_cached::<Q, _, _, _>(
+            key,
+            |value, index| {
+                self.dep_graph.read_index(index);
+                value.clone()
+            },
+            |key, lookup| self.try_execute_query::<Q>(span, key, lookup),
+        )
+    }
+
+    #[inline(always)]
+    pub(super) fn try_execute_query<Q: QueryDescription<'tcx>>(
+        self,
+        span: Span,
+        key: Q::Key,
+        lookup: QueryLookup<'tcx, Q>,
+    ) -> Q::Value {
+        let job = match JobOwner::try_start(self, span, &key, lookup) {
             TryGetJob::NotYetStarted(job) => job,
             TryGetJob::Cycle(result) => return result,
+            #[cfg(parallel_compiler)]
             TryGetJob::JobCompleted((v, index)) => {
                 self.dep_graph.read_index(index);
                 return v;
@@ -560,7 +637,7 @@ impl<'tcx> TyCtxt<'tcx> {
     fn force_query_with_job<Q: QueryDescription<'tcx>>(
         self,
         key: Q::Key,
-        job: JobOwner<'_, 'tcx, Q>,
+        job: JobOwner<'tcx, Q>,
         dep_node: DepNode,
     ) -> (Q::Value, DepNodeIndex) {
         // If the following assertion triggers, it can have two reasons:
@@ -615,7 +692,7 @@ impl<'tcx> TyCtxt<'tcx> {
     /// side-effects -- e.g., in order to report errors for erroneous programs.
     ///
     /// Note: The optimization is only available during incr. comp.
-    pub(super) fn ensure_query<Q: QueryDescription<'tcx>>(self, key: Q::Key) -> () {
+    pub(super) fn ensure_query<Q: QueryDescription<'tcx> + 'tcx>(self, key: Q::Key) -> () {
         if Q::EVAL_ALWAYS {
             let _ = self.get_query::<Q>(DUMMY_SP, key);
             return;
@@ -643,14 +720,30 @@ impl<'tcx> TyCtxt<'tcx> {
     }
 
     #[allow(dead_code)]
-    fn force_query<Q: QueryDescription<'tcx>>(self, key: Q::Key, span: Span, dep_node: DepNode) {
+    fn force_query<Q: QueryDescription<'tcx> + 'tcx>(
+        self,
+        key: Q::Key,
+        span: Span,
+        dep_node: DepNode,
+    ) {
         // We may be concurrently trying both execute and force a query.
         // Ensure that only one of them runs the query.
-        let job = match JobOwner::try_get(self, span, &key) {
-            TryGetJob::NotYetStarted(job) => job,
-            TryGetJob::Cycle(_) | TryGetJob::JobCompleted(_) => return,
-        };
-        self.force_query_with_job::<Q>(key, job, dep_node);
+
+        self.try_get_cached::<Q, _, _, _>(
+            key,
+            |_, _| {
+                // Cache hit, do nothing
+            },
+            |key, lookup| {
+                let job = match JobOwner::try_start(self, span, &key, lookup) {
+                    TryGetJob::NotYetStarted(job) => job,
+                    TryGetJob::Cycle(_) => return,
+                    #[cfg(parallel_compiler)]
+                    TryGetJob::JobCompleted(_) => return,
+                };
+                self.force_query_with_job::<Q>(key, job, dep_node);
+            },
+        );
     }
 }
 
@@ -659,17 +752,17 @@ macro_rules! handle_cycle_error {
         $tcx.report_cycle($error).emit();
         Value::from_cycle_error($tcx)
     }};
-    ([fatal_cycle$(, $modifiers:ident)*][$tcx:expr, $error:expr]) => {{
+    ([fatal_cycle $($rest:tt)*][$tcx:expr, $error:expr]) => {{
         $tcx.report_cycle($error).emit();
         $tcx.sess.abort_if_errors();
         unreachable!()
     }};
-    ([cycle_delay_bug$(, $modifiers:ident)*][$tcx:expr, $error:expr]) => {{
+    ([cycle_delay_bug $($rest:tt)*][$tcx:expr, $error:expr]) => {{
         $tcx.report_cycle($error).delay_as_bug();
         Value::from_cycle_error($tcx)
     }};
-    ([$other:ident$(, $modifiers:ident)*][$($args:tt)*]) => {
-        handle_cycle_error!([$($modifiers),*][$($args)*])
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*][$($args:tt)*]) => {
+        handle_cycle_error!([$($($modifiers)*)*][$($args)*])
     };
 }
 
@@ -677,11 +770,11 @@ macro_rules! is_anon {
     ([]) => {{
         false
     }};
-    ([anon$(, $modifiers:ident)*]) => {{
+    ([anon $($rest:tt)*]) => {{
         true
     }};
-    ([$other:ident$(, $modifiers:ident)*]) => {
-        is_anon!([$($modifiers),*])
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*]) => {
+        is_anon!([$($($modifiers)*)*])
     };
 }
 
@@ -689,11 +782,23 @@ macro_rules! is_eval_always {
     ([]) => {{
         false
     }};
-    ([eval_always$(, $modifiers:ident)*]) => {{
+    ([eval_always $($rest:tt)*]) => {{
         true
     }};
-    ([$other:ident$(, $modifiers:ident)*]) => {
-        is_eval_always!([$($modifiers),*])
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*]) => {
+        is_eval_always!([$($($modifiers)*)*])
+    };
+}
+
+macro_rules! query_storage {
+    ([][$K:ty, $V:ty]) => {
+        <<$K as Key>::CacheSelector as CacheSelector<$K, $V>>::Cache
+    };
+    ([storage($ty:ty) $($rest:tt)*][$K:ty, $V:ty]) => {
+        $ty
+    };
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*][$($args:tt)*]) => {
+        query_storage!([$($($modifiers)*)*][$($args)*])
     };
 }
 
@@ -701,11 +806,11 @@ macro_rules! hash_result {
     ([][$hcx:expr, $result:expr]) => {{
         dep_graph::hash_result($hcx, &$result)
     }};
-    ([no_hash$(, $modifiers:ident)*][$hcx:expr, $result:expr]) => {{
+    ([no_hash $($rest:tt)*][$hcx:expr, $result:expr]) => {{
         None
     }};
-    ([$other:ident$(, $modifiers:ident)*][$($args:tt)*]) => {
-        hash_result!([$($modifiers),*][$($args)*])
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*][$($args:tt)*]) => {
+        hash_result!([$($($modifiers)*)*][$($args)*])
     };
 }
 
@@ -725,7 +830,6 @@ macro_rules! define_queries_inner {
         [$($modifiers:tt)*] fn $name:ident: $node:ident($K:ty) -> $V:ty,)*) => {
 
         use std::mem;
-        use rustc_data_structures::sharded::Sharded;
         use crate::{
             rustc_data_structures::stable_hasher::HashStable,
             rustc_data_structures::stable_hasher::StableHasher,
@@ -760,11 +864,11 @@ macro_rules! define_queries_inner {
                 $(
                     // We use try_lock_shards here since we are called from the
                     // deadlock handler, and this shouldn't be locked.
-                    let shards = self.$name.try_lock_shards()?;
+                    let shards = self.$name.shards.try_lock_shards()?;
                     let shards = shards.iter().enumerate();
                     jobs.extend(shards.flat_map(|(shard_id, shard)| {
                         shard.active.iter().filter_map(move |(k, v)| {
-                            if let QueryResult::Started(ref job) = *v {
+                        if let QueryResult::Started(ref job) = *v {
                                 let id = QueryJobId {
                                     job: job.id,
                                     shard:  u16::try_from(shard_id).unwrap(),
@@ -776,111 +880,15 @@ macro_rules! define_queries_inner {
                                     query: queries::$name::query(k.clone())
                                 };
                                 Some((id, QueryJobInfo { info,  job: job.clone() }))
-                            } else {
-                                None
-                            }
+                        } else {
+                            None
+                        }
                         })
                     }));
                 )*
 
                 Some(jobs)
             }
-
-            pub fn print_stats(&self) {
-                let mut queries = Vec::new();
-
-                #[derive(Clone)]
-                struct QueryStats {
-                    name: &'static str,
-                    cache_hits: usize,
-                    key_size: usize,
-                    key_type: &'static str,
-                    value_size: usize,
-                    value_type: &'static str,
-                    entry_count: usize,
-                }
-
-                fn stats<'tcx, Q: QueryConfig<'tcx>>(
-                    name: &'static str,
-                    map: &Sharded<QueryCache<'tcx, Q>>,
-                ) -> QueryStats {
-                    let map = map.lock_shards();
-                    QueryStats {
-                        name,
-                        #[cfg(debug_assertions)]
-                        cache_hits: map.iter().map(|shard| shard.cache_hits).sum(),
-                        #[cfg(not(debug_assertions))]
-                        cache_hits: 0,
-                        key_size: mem::size_of::<Q::Key>(),
-                        key_type: type_name::<Q::Key>(),
-                        value_size: mem::size_of::<Q::Value>(),
-                        value_type: type_name::<Q::Value>(),
-                        entry_count: map.iter().map(|shard| shard.results.len()).sum(),
-                    }
-                }
-
-                $(
-                    queries.push(stats::<queries::$name<'_>>(
-                        stringify!($name),
-                        &self.$name,
-                    ));
-                )*
-
-                if cfg!(debug_assertions) {
-                    let hits: usize = queries.iter().map(|s| s.cache_hits).sum();
-                    let results: usize = queries.iter().map(|s| s.entry_count).sum();
-                    println!("\nQuery cache hit rate: {}", hits as f64 / (hits + results) as f64);
-                }
-
-                let mut query_key_sizes = queries.clone();
-                query_key_sizes.sort_by_key(|q| q.key_size);
-                println!("\nLarge query keys:");
-                for q in query_key_sizes.iter().rev()
-                                        .filter(|q| q.key_size > 8) {
-                    println!(
-                        "   {} - {} x {} - {}",
-                        q.name,
-                        q.key_size,
-                        q.entry_count,
-                        q.key_type
-                    );
-                }
-
-                let mut query_value_sizes = queries.clone();
-                query_value_sizes.sort_by_key(|q| q.value_size);
-                println!("\nLarge query values:");
-                for q in query_value_sizes.iter().rev()
-                                          .filter(|q| q.value_size > 8) {
-                    println!(
-                        "   {} - {} x {} - {}",
-                        q.name,
-                        q.value_size,
-                        q.entry_count,
-                        q.value_type
-                    );
-                }
-
-                if cfg!(debug_assertions) {
-                    let mut query_cache_hits = queries.clone();
-                    query_cache_hits.sort_by_key(|q| q.cache_hits);
-                    println!("\nQuery cache hits:");
-                    for q in query_cache_hits.iter().rev() {
-                        println!(
-                            "   {} - {} ({}%)",
-                            q.name,
-                            q.cache_hits,
-                            q.cache_hits as f64 / (q.cache_hits + q.entry_count) as f64
-                        );
-                    }
-                }
-
-                let mut query_value_count = queries.clone();
-                query_value_count.sort_by_key(|q| q.entry_count);
-                println!("\nQuery value count:");
-                for q in query_value_count.iter().rev() {
-                    println!("   {} - {}", q.name, q.entry_count);
-                }
-            }
         }
 
         #[allow(nonstandard_style)]
@@ -956,7 +964,6 @@ macro_rules! define_queries_inner {
         $(impl<$tcx> QueryConfig<$tcx> for queries::$name<$tcx> {
             type Key = $K;
             type Value = $V;
-
             const NAME: &'static str = stringify!($name);
             const CATEGORY: ProfileCategory = $category;
         }
@@ -965,22 +972,22 @@ macro_rules! define_queries_inner {
             const ANON: bool = is_anon!([$($modifiers)*]);
             const EVAL_ALWAYS: bool = is_eval_always!([$($modifiers)*]);
 
+            type Cache = query_storage!([$($modifiers)*][$K, $V]);
+
             #[inline(always)]
             fn query(key: Self::Key) -> Query<'tcx> {
                 Query::$name(key)
             }
 
             #[inline(always)]
-            fn query_cache<'a>(tcx: TyCtxt<$tcx>) -> &'a Sharded<QueryCache<$tcx, Self>> {
+            fn query_state<'a>(tcx: TyCtxt<$tcx>) -> &'a QueryState<$tcx, Self> {
                 &tcx.queries.$name
             }
 
             #[allow(unused)]
             #[inline(always)]
             fn to_dep_node(tcx: TyCtxt<$tcx>, key: &Self::Key) -> DepNode {
-                use crate::dep_graph::DepConstructor::*;
-
-                DepNode::new(tcx, $node(*key))
+                DepConstructor::$node(tcx, *key)
             }
 
             #[inline(always)]
@@ -1132,7 +1139,7 @@ macro_rules! define_queries_struct {
             providers: IndexVec<CrateNum, Providers<$tcx>>,
             fallback_extern_providers: Box<Providers<$tcx>>,
 
-            $($(#[$attr])*  $name: Sharded<QueryCache<$tcx, queries::$name<$tcx>>>,)*
+            $($(#[$attr])*  $name: QueryState<$tcx, queries::$name<$tcx>>,)*
         }
     };
 }
diff --git a/src/librustc/ty/query/profiling_support.rs b/src/librustc/ty/query/profiling_support.rs
index 79b32ba83aea0..99ada34d59ebe 100644
--- a/src/librustc/ty/query/profiling_support.rs
+++ b/src/librustc/ty/query/profiling_support.rs
@@ -1,11 +1,10 @@
 use crate::hir::map::definitions::DefPathData;
 use crate::ty::context::TyCtxt;
-use crate::ty::query::config::QueryConfig;
-use crate::ty::query::plumbing::QueryCache;
+use crate::ty::query::config::QueryAccessors;
+use crate::ty::query::plumbing::QueryState;
 use measureme::{StringComponent, StringId};
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::profiling::SelfProfiler;
-use rustc_data_structures::sharded::Sharded;
 use rustc_hir::def_id::{CrateNum, DefId, DefIndex, CRATE_DEF_INDEX, LOCAL_CRATE};
 use std::fmt::Debug;
 use std::io::Write;
@@ -161,10 +160,10 @@ where
 pub(super) fn alloc_self_profile_query_strings_for_query_cache<'tcx, Q>(
     tcx: TyCtxt<'tcx>,
     query_name: &'static str,
-    query_cache: &Sharded<QueryCache<'tcx, Q>>,
+    query_state: &QueryState<'tcx, Q>,
     string_cache: &mut QueryKeyStringCache,
 ) where
-    Q: QueryConfig<'tcx>,
+    Q: QueryAccessors<'tcx>,
 {
     tcx.prof.with_profiler(|profiler| {
         let event_id_builder = profiler.event_id_builder();
@@ -181,20 +180,8 @@ pub(super) fn alloc_self_profile_query_strings_for_query_cache<'tcx, Q>(
             // need to invoke queries itself, we cannot keep the query caches
             // locked while doing so. Instead we copy out the
             // `(query_key, dep_node_index)` pairs and release the lock again.
-            let query_keys_and_indices = {
-                let shards = query_cache.lock_shards();
-                let len = shards.iter().map(|shard| shard.results.len()).sum();
-
-                let mut query_keys_and_indices = Vec::with_capacity(len);
-
-                for shard in &shards {
-                    query_keys_and_indices.extend(
-                        shard.results.iter().map(|(q_key, q_val)| (q_key.clone(), q_val.index)),
-                    );
-                }
-
-                query_keys_and_indices
-            };
+            let query_keys_and_indices: Vec<_> = query_state
+                .iter_results(|results| results.map(|(k, _, i)| (k.clone(), i)).collect());
 
             // Now actually allocate the strings. If allocating the strings
             // generates new entries in the query cache, we'll miss them but
@@ -218,18 +205,14 @@ pub(super) fn alloc_self_profile_query_strings_for_query_cache<'tcx, Q>(
             let query_name = profiler.get_or_alloc_cached_string(query_name);
             let event_id = event_id_builder.from_label(query_name).to_string_id();
 
-            let shards = query_cache.lock_shards();
+            query_state.iter_results(|results| {
+                let query_invocation_ids: Vec<_> = results.map(|v| v.2.into()).collect();
 
-            for shard in shards.iter() {
-                let query_invocation_ids = shard
-                    .results
-                    .values()
-                    .map(|v| v.index)
-                    .map(|dep_node_index| dep_node_index.into());
-
-                profiler
-                    .bulk_map_query_invocation_id_to_single_string(query_invocation_ids, event_id);
-            }
+                profiler.bulk_map_query_invocation_id_to_single_string(
+                    query_invocation_ids.into_iter(),
+                    event_id,
+                );
+            });
         }
     });
 }
diff --git a/src/librustc/ty/query/stats.rs b/src/librustc/ty/query/stats.rs
new file mode 100644
index 0000000000000..d257320d4eaf6
--- /dev/null
+++ b/src/librustc/ty/query/stats.rs
@@ -0,0 +1,139 @@
+use crate::ty::query::config::QueryAccessors;
+use crate::ty::query::plumbing::QueryState;
+use crate::ty::query::queries;
+use crate::ty::TyCtxt;
+use rustc_hir::def_id::{DefId, LOCAL_CRATE};
+
+use std::any::type_name;
+use std::mem;
+#[cfg(debug_assertions)]
+use std::sync::atomic::Ordering;
+
+trait KeyStats {
+    fn key_stats(&self, stats: &mut QueryStats);
+}
+
+impl<T> KeyStats for T {
+    default fn key_stats(&self, _: &mut QueryStats) {}
+}
+
+impl KeyStats for DefId {
+    fn key_stats(&self, stats: &mut QueryStats) {
+        if self.krate == LOCAL_CRATE {
+            stats.local_def_id_keys = Some(stats.local_def_id_keys.unwrap_or(0) + 1);
+        }
+    }
+}
+
+#[derive(Clone)]
+struct QueryStats {
+    name: &'static str,
+    cache_hits: usize,
+    key_size: usize,
+    key_type: &'static str,
+    value_size: usize,
+    value_type: &'static str,
+    entry_count: usize,
+    local_def_id_keys: Option<usize>,
+}
+
+fn stats<'tcx, Q: QueryAccessors<'tcx>>(
+    name: &'static str,
+    map: &QueryState<'tcx, Q>,
+) -> QueryStats {
+    let mut stats = QueryStats {
+        name,
+        #[cfg(debug_assertions)]
+        cache_hits: map.cache_hits.load(Ordering::Relaxed),
+        #[cfg(not(debug_assertions))]
+        cache_hits: 0,
+        key_size: mem::size_of::<Q::Key>(),
+        key_type: type_name::<Q::Key>(),
+        value_size: mem::size_of::<Q::Value>(),
+        value_type: type_name::<Q::Value>(),
+        entry_count: map.iter_results(|results| results.count()),
+        local_def_id_keys: None,
+    };
+    map.iter_results(|results| {
+        for (key, _, _) in results {
+            key.key_stats(&mut stats)
+        }
+    });
+    stats
+}
+
+pub fn print_stats(tcx: TyCtxt<'_>) {
+    let queries = query_stats(tcx);
+
+    if cfg!(debug_assertions) {
+        let hits: usize = queries.iter().map(|s| s.cache_hits).sum();
+        let results: usize = queries.iter().map(|s| s.entry_count).sum();
+        println!("\nQuery cache hit rate: {}", hits as f64 / (hits + results) as f64);
+    }
+
+    let mut query_key_sizes = queries.clone();
+    query_key_sizes.sort_by_key(|q| q.key_size);
+    println!("\nLarge query keys:");
+    for q in query_key_sizes.iter().rev().filter(|q| q.key_size > 8) {
+        println!("   {} - {} x {} - {}", q.name, q.key_size, q.entry_count, q.key_type);
+    }
+
+    let mut query_value_sizes = queries.clone();
+    query_value_sizes.sort_by_key(|q| q.value_size);
+    println!("\nLarge query values:");
+    for q in query_value_sizes.iter().rev().filter(|q| q.value_size > 8) {
+        println!("   {} - {} x {} - {}", q.name, q.value_size, q.entry_count, q.value_type);
+    }
+
+    if cfg!(debug_assertions) {
+        let mut query_cache_hits = queries.clone();
+        query_cache_hits.sort_by_key(|q| q.cache_hits);
+        println!("\nQuery cache hits:");
+        for q in query_cache_hits.iter().rev() {
+            println!(
+                "   {} - {} ({}%)",
+                q.name,
+                q.cache_hits,
+                q.cache_hits as f64 / (q.cache_hits + q.entry_count) as f64
+            );
+        }
+    }
+
+    let mut query_value_count = queries.clone();
+    query_value_count.sort_by_key(|q| q.entry_count);
+    println!("\nQuery value count:");
+    for q in query_value_count.iter().rev() {
+        println!("   {} - {}", q.name, q.entry_count);
+    }
+
+    let mut def_id_density: Vec<_> =
+        queries.iter().filter(|q| q.local_def_id_keys.is_some()).collect();
+    def_id_density.sort_by_key(|q| q.local_def_id_keys.unwrap());
+    println!("\nLocal DefId density:");
+    let total = tcx.hir().definitions().def_index_count() as f64;
+    for q in def_id_density.iter().rev() {
+        let local = q.local_def_id_keys.unwrap();
+        println!("   {} - {} = ({}%)", q.name, local, (local as f64 * 100.0) / total);
+    }
+}
+
+macro_rules! print_stats {
+    (<$tcx:tt> $($category:tt {
+        $($(#[$attr:meta])* [$($modifiers:tt)*] fn $name:ident: $node:ident($K:ty) -> $V:ty,)*
+    },)*) => {
+        fn query_stats(tcx: TyCtxt<'_>) -> Vec<QueryStats> {
+            let mut queries = Vec::new();
+
+            $($(
+                queries.push(stats::<queries::$name<'_>>(
+                    stringify!($name),
+                    &tcx.queries.$name,
+                ));
+            )*)*
+
+            queries
+        }
+    }
+}
+
+rustc_query_append! { [print_stats!][<'tcx>] }
diff --git a/src/librustc_data_structures/profiling.rs b/src/librustc_data_structures/profiling.rs
index 1d0ac4f4907d1..f2c80510f2269 100644
--- a/src/librustc_data_structures/profiling.rs
+++ b/src/librustc_data_structures/profiling.rs
@@ -81,6 +81,7 @@
 //!
 //! [mm]: https://github.com/rust-lang/measureme/
 
+use crate::cold_path;
 use crate::fx::FxHashMap;
 
 use std::borrow::Borrow;
@@ -531,9 +532,11 @@ impl<'a> TimingGuard<'a> {
     #[inline]
     pub fn finish_with_query_invocation_id(self, query_invocation_id: QueryInvocationId) {
         if let Some(guard) = self.0 {
-            let event_id = StringId::new_virtual(query_invocation_id.0);
-            let event_id = EventId::from_virtual(event_id);
-            guard.finish_with_override_event_id(event_id);
+            cold_path(|| {
+                let event_id = StringId::new_virtual(query_invocation_id.0);
+                let event_id = EventId::from_virtual(event_id);
+                guard.finish_with_override_event_id(event_id);
+            });
         }
     }
 
diff --git a/src/librustc_data_structures/stable_hasher.rs b/src/librustc_data_structures/stable_hasher.rs
index 0a26bf3bdc93f..a98e77cebd88a 100644
--- a/src/librustc_data_structures/stable_hasher.rs
+++ b/src/librustc_data_structures/stable_hasher.rs
@@ -27,6 +27,7 @@ pub trait StableHasherResult: Sized {
 }
 
 impl StableHasher {
+    #[inline]
     pub fn new() -> Self {
         StableHasher { state: SipHasher128::new_with_keys(0, 0) }
     }
diff --git a/src/librustc_interface/passes.rs b/src/librustc_interface/passes.rs
index 0b4a337051f01..96a2ac08f2c49 100644
--- a/src/librustc_interface/passes.rs
+++ b/src/librustc_interface/passes.rs
@@ -696,8 +696,8 @@ impl<'tcx> QueryContext<'tcx> {
         ty::tls::enter_global(self.0, |tcx| f(tcx))
     }
 
-    pub fn print_stats(&self) {
-        self.0.queries.print_stats()
+    pub fn print_stats(&mut self) {
+        self.enter(|tcx| ty::query::print_stats(tcx))
     }
 }
 
diff --git a/src/librustc_interface/queries.rs b/src/librustc_interface/queries.rs
index 720d162ac819e..0c77ab57500a0 100644
--- a/src/librustc_interface/queries.rs
+++ b/src/librustc_interface/queries.rs
@@ -340,7 +340,7 @@ impl Compiler {
 
         if self.session().opts.debugging_opts.query_stats {
             if let Ok(gcx) = queries.global_ctxt() {
-                gcx.peek().print_stats();
+                gcx.peek_mut().print_stats();
             }
         }
 
diff --git a/src/librustc_macros/src/query.rs b/src/librustc_macros/src/query.rs
index 294cdb7643fc9..6362f3c2c49f0 100644
--- a/src/librustc_macros/src/query.rs
+++ b/src/librustc_macros/src/query.rs
@@ -33,6 +33,9 @@ enum QueryModifier {
     /// The description of the query.
     Desc(Option<Ident>, Punctuated<Expr, Token![,]>),
 
+    /// Use this type for the in-memory cache.
+    Storage(Type),
+
     /// Cache the query to disk if the `Expr` returns true.
     Cache(Option<(IdentOrWild, IdentOrWild)>, Block),
 
@@ -106,6 +109,9 @@ impl Parse for QueryModifier {
             let id = args.parse()?;
             let block = input.parse()?;
             Ok(QueryModifier::LoadCached(tcx, id, block))
+        } else if modifier == "storage" {
+            let ty = input.parse()?;
+            Ok(QueryModifier::Storage(ty))
         } else if modifier == "fatal_cycle" {
             Ok(QueryModifier::FatalCycle)
         } else if modifier == "cycle_delay_bug" {
@@ -198,6 +204,9 @@ struct QueryModifiers {
     /// The description of the query.
     desc: Option<(Option<Ident>, Punctuated<Expr, Token![,]>)>,
 
+    /// Use this type for the in-memory cache.
+    storage: Option<Type>,
+
     /// Cache the query to disk if the `Block` returns true.
     cache: Option<(Option<(IdentOrWild, IdentOrWild)>, Block)>,
 
@@ -226,6 +235,7 @@ struct QueryModifiers {
 /// Process query modifiers into a struct, erroring on duplicates
 fn process_modifiers(query: &mut Query) -> QueryModifiers {
     let mut load_cached = None;
+    let mut storage = None;
     let mut cache = None;
     let mut desc = None;
     let mut fatal_cycle = false;
@@ -242,6 +252,12 @@ fn process_modifiers(query: &mut Query) -> QueryModifiers {
                 }
                 load_cached = Some((tcx, id, block));
             }
+            QueryModifier::Storage(ty) => {
+                if storage.is_some() {
+                    panic!("duplicate modifier `storage` for query `{}`", query.name);
+                }
+                storage = Some(ty);
+            }
             QueryModifier::Cache(args, expr) => {
                 if cache.is_some() {
                     panic!("duplicate modifier `cache` for query `{}`", query.name);
@@ -294,6 +310,7 @@ fn process_modifiers(query: &mut Query) -> QueryModifiers {
     }
     QueryModifiers {
         load_cached,
+        storage,
         cache,
         desc,
         fatal_cycle,
@@ -451,6 +468,10 @@ pub fn rustc_queries(input: TokenStream) -> TokenStream {
             if modifiers.fatal_cycle {
                 attributes.push(quote! { fatal_cycle });
             };
+            // Pass on the storage modifier
+            if let Some(ref ty) = modifiers.storage {
+                attributes.push(quote! { storage(#ty) });
+            };
             // Pass on the cycle_delay_bug modifier
             if modifiers.cycle_delay_bug {
                 attributes.push(quote! { cycle_delay_bug });
diff --git a/src/librustc_session/session.rs b/src/librustc_session/session.rs
index 6f003043aa95c..2fb7977dce9ee 100644
--- a/src/librustc_session/session.rs
+++ b/src/librustc_session/session.rs
@@ -392,6 +392,7 @@ impl Session {
         );
     }
 
+    #[inline]
     pub fn source_map(&self) -> &source_map::SourceMap {
         self.parse_sess.source_map()
     }