Merge branch 'mcpm' of git://git.linaro.org/people/nico/linux into devel-stable

2013-04-25 09:42:42 +01:00 · 2013-04-25 09:42:42 +01:00 · a126f7c41d
parent 0098fc39e6 a7eb7c6f9a
commit a126f7c41d
646 changed files with 8321 additions and 2977 deletions
--- a/6
+++ b/6
@ -953,11 +953,11 @@ S: Blacksburg, Virginia 24061
 S: USA

 N: Randy Dunlap
-E: rdunlap@xenotime.net
-W: http://www.xenotime.net/linux/linux.html
-W: http://www.linux-usb.org
+E: rdunlap@infradead.org
+W: http://www.infradead.org/~rdunlap/
 D: Linux-USB subsystem, USB core/UHCI/printer/storage drivers
 D: x86 SMP, ACPI, bootflag hacking
+D: documentation, builds
 S: (ask for current address)
 S: USA

--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@ -60,8 +60,7 @@ own source tree.  For example:
 "dontdiff" is a list of files which are generated by the kernel during
 the build process, and should be ignored in any diff(1)-generated
 patch.  The "dontdiff" file is included in the kernel tree in
-2.6.12 and later.  For earlier kernel versions, you can get it
-from <http://www.xenotime.net/linux/doc/dontdiff>.
+2.6.12 and later.

 Make sure your patch does not include any extra files which do not
 belong in a patch submission.  Make sure to review your patch -after-
--- a/Documentation/arm/cluster-pm-race-avoidance.txt
+++ b/Documentation/arm/cluster-pm-race-avoidance.txt
@ -0,0 +1,498 @@
+Cluster-wide Power-up/power-down race avoidance algorithm
+=========================================================
+
+This file documents the algorithm which is used to coordinate CPU and
+cluster setup and teardown operations and to manage hardware coherency
+controls safely.
+
+The section "Rationale" explains what the algorithm is for and why it is
+needed.  "Basic model" explains general concepts using a simplified view
+of the system.  The other sections explain the actual details of the
+algorithm in use.
+
+
+Rationale
+---------
+
+In a system containing multiple CPUs, it is desirable to have the
+ability to turn off individual CPUs when the system is idle, reducing
+power consumption and thermal dissipation.
+
+In a system containing multiple clusters of CPUs, it is also desirable
+to have the ability to turn off entire clusters.
+
+Turning entire clusters off and on is a risky business, because it
+involves performing potentially destructive operations affecting a group
+of independently running CPUs, while the OS continues to run.  This
+means that we need some coordination in order to ensure that critical
+cluster-level operations are only performed when it is truly safe to do
+so.
+
+Simple locking may not be sufficient to solve this problem, because
+mechanisms like Linux spinlocks may rely on coherency mechanisms which
+are not immediately enabled when a cluster powers up.  Since enabling or
+disabling those mechanisms may itself be a non-atomic operation (such as
+writing some hardware registers and invalidating large caches), other
+methods of coordination are required in order to guarantee safe
+power-down and power-up at the cluster level.
+
+The mechanism presented in this document describes a coherent memory
+based protocol for performing the needed coordination.  It aims to be as
+lightweight as possible, while providing the required safety properties.
+
+
+Basic model
+-----------
+
+Each cluster and CPU is assigned a state, as follows:
+
+	DOWN
+	COMING_UP
+	UP
+	GOING_DOWN
+
+	    +---------> UP ----------+
+	    |                        v
+
+	COMING_UP                GOING_DOWN
+
+	    ^                        |
+	    +--------- DOWN <--------+
+
+
+DOWN:	The CPU or cluster is not coherent, and is either powered off or
+	suspended, or is ready to be powered off or suspended.
+
+COMING_UP: The CPU or cluster has committed to moving to the UP state.
+	It may be part way through the process of initialisation and
+	enabling coherency.
+
+UP:	The CPU or cluster is active and coherent at the hardware
+	level.  A CPU in this state is not necessarily being used
+	actively by the kernel.
+
+GOING_DOWN: The CPU or cluster has committed to moving to the DOWN
+	state.  It may be part way through the process of teardown and
+	coherency exit.
+
+
+Each CPU has one of these states assigned to it at any point in time.
+The CPU states are described in the "CPU state" section, below.
+
+Each cluster is also assigned a state, but it is necessary to split the
+state value into two parts (the "cluster" state and "inbound" state) and
+to introduce additional states in order to avoid races between different
+CPUs in the cluster simultaneously modifying the state.  The cluster-
+level states are described in the "Cluster state" section.
+
+To help distinguish the CPU states from cluster states in this
+discussion, the state names are given a CPU_ prefix for the CPU states,
+and a CLUSTER_ or INBOUND_ prefix for the cluster states.
+
+
+CPU state
+---------
+
+In this algorithm, each individual core in a multi-core processor is
+referred to as a "CPU".  CPUs are assumed to be single-threaded:
+therefore, a CPU can only be doing one thing at a single point in time.
+
+This means that CPUs fit the basic model closely.
+
+The algorithm defines the following states for each CPU in the system:
+
+	CPU_DOWN
+	CPU_COMING_UP
+	CPU_UP
+	CPU_GOING_DOWN
+
+	 cluster setup and
+	CPU setup complete          policy decision
+	      +-----------> CPU_UP ------------+
+	      |                                v
+
+	CPU_COMING_UP                   CPU_GOING_DOWN
+
+	      ^                                |
+	      +----------- CPU_DOWN <----------+
+	 policy decision           CPU teardown complete
+	or hardware event
+
+
+The definitions of the four states correspond closely to the states of
+the basic model.
+
+Transitions between states occur as follows.
+
+A trigger event (spontaneous) means that the CPU can transition to the
+next state as a result of making local progress only, with no
+requirement for any external event to happen.
+
+
+CPU_DOWN:
+
+	A CPU reaches the CPU_DOWN state when it is ready for
+	power-down.  On reaching this state, the CPU will typically
+	power itself down or suspend itself, via a WFI instruction or a
+	firmware call.
+
+	Next state:	CPU_COMING_UP
+	Conditions:	none
+
+	Trigger events:
+
+		a) an explicit hardware power-up operation, resulting
+		   from a policy decision on another CPU;
+
+		b) a hardware event, such as an interrupt.
+
+
+CPU_COMING_UP:
+
+	A CPU cannot start participating in hardware coherency until the
+	cluster is set up and coherent.  If the cluster is not ready,
+	then the CPU will wait in the CPU_COMING_UP state until the
+	cluster has been set up.
+
+	Next state:	CPU_UP
+	Conditions:	The CPU's parent cluster must be in CLUSTER_UP.
+	Trigger events:	Transition of the parent cluster to CLUSTER_UP.
+
+	Refer to the "Cluster state" section for a description of the
+	CLUSTER_UP state.
+
+
+CPU_UP:
+	When a CPU reaches the CPU_UP state, it is safe for the CPU to
+	start participating in local coherency.
+
+	This is done by jumping to the kernel's CPU resume code.
+
+	Note that the definition of this state is slightly different
+	from the basic model definition: CPU_UP does not mean that the
+	CPU is coherent yet, but it does mean that it is safe to resume
+	the kernel.  The kernel handles the rest of the resume
+	procedure, so the remaining steps are not visible as part of the
+	race avoidance algorithm.
+
+	The CPU remains in this state until an explicit policy decision
+	is made to shut down or suspend the CPU.
+
+	Next state:	CPU_GOING_DOWN
+	Conditions:	none
+	Trigger events:	explicit policy decision
+
+
+CPU_GOING_DOWN:
+
+	While in this state, the CPU exits coherency, including any
+	operations required to achieve this (such as cleaning data
+	caches).
+
+	Next state:	CPU_DOWN
+	Conditions:	local CPU teardown complete
+	Trigger events:	(spontaneous)
+
+
+Cluster state
+-------------
+
+A cluster is a group of connected CPUs with some common resources.
+Because a cluster contains multiple CPUs, it can be doing multiple
+things at the same time.  This has some implications.  In particular, a
+CPU can start up while another CPU is tearing the cluster down.
+
+In this discussion, the "outbound side" is the view of the cluster state
+as seen by a CPU tearing the cluster down.  The "inbound side" is the
+view of the cluster state as seen by a CPU setting the CPU up.
+
+In order to enable safe coordination in such situations, it is important
+that a CPU which is setting up the cluster can advertise its state
+independently of the CPU which is tearing down the cluster.  For this
+reason, the cluster state is split into two parts:
+
+	"cluster" state: The global state of the cluster; or the state
+		on the outbound side:
+
+		CLUSTER_DOWN
+		CLUSTER_UP
+		CLUSTER_GOING_DOWN
+
+	"inbound" state: The state of the cluster on the inbound side.
+
+		INBOUND_NOT_COMING_UP
+		INBOUND_COMING_UP
+
+
+	The different pairings of these states results in six possible
+	states for the cluster as a whole:
+
+	                            CLUSTER_UP
+	          +==========> INBOUND_NOT_COMING_UP -------------+
+	          #                                               |
+	                                                          |
+	     CLUSTER_UP     <----+                                |
+	  INBOUND_COMING_UP      |                                v
+
+	          ^             CLUSTER_GOING_DOWN       CLUSTER_GOING_DOWN
+	          #              INBOUND_COMING_UP <=== INBOUND_NOT_COMING_UP
+
+	    CLUSTER_DOWN         |                                |
+	  INBOUND_COMING_UP <----+                                |
+	                                                          |
+	          ^                                               |
+	          +===========     CLUSTER_DOWN      <------------+
+	                       INBOUND_NOT_COMING_UP
+
+	Transitions -----> can only be made by the outbound CPU, and
+	only involve changes to the "cluster" state.
+
+	Transitions ===##> can only be made by the inbound CPU, and only
+	involve changes to the "inbound" state, except where there is no
+	further transition possible on the outbound side (i.e., the
+	outbound CPU has put the cluster into the CLUSTER_DOWN state).
+
+	The race avoidance algorithm does not provide a way to determine
+	which exact CPUs within the cluster play these roles.  This must
+	be decided in advance by some other means.  Refer to the section
+	"Last man and first man selection" for more explanation.
+
+
+	CLUSTER_DOWN/INBOUND_NOT_COMING_UP is the only state where the
+	cluster can actually be powered down.
+
+	The parallelism of the inbound and outbound CPUs is observed by
+	the existence of two different paths from CLUSTER_GOING_DOWN/
+	INBOUND_NOT_COMING_UP (corresponding to GOING_DOWN in the basic
+	model) to CLUSTER_DOWN/INBOUND_COMING_UP (corresponding to
+	COMING_UP in the basic model).  The second path avoids cluster
+	teardown completely.
+
+	CLUSTER_UP/INBOUND_COMING_UP is equivalent to UP in the basic
+	model.  The final transition to CLUSTER_UP/INBOUND_NOT_COMING_UP
+	is trivial and merely resets the state machine ready for the
+	next cycle.
+
+	Details of the allowable transitions follow.
+
+	The next state in each case is notated
+
+		<cluster state>/<inbound state> (<transitioner>)
+
+	where the <transitioner> is the side on which the transition
+	can occur; either the inbound or the outbound side.
+
+
+CLUSTER_DOWN/INBOUND_NOT_COMING_UP:
+
+	Next state:	CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
+	Conditions:	none
+	Trigger events:
+
+		a) an explicit hardware power-up operation, resulting
+		   from a policy decision on another CPU;
+
+		b) a hardware event, such as an interrupt.
+
+
+CLUSTER_DOWN/INBOUND_COMING_UP:
+
+	In this state, an inbound CPU sets up the cluster, including
+	enabling of hardware coherency at the cluster level and any
+	other operations (such as cache invalidation) which are required
+	in order to achieve this.
+
+	The purpose of this state is to do sufficient cluster-level
+	setup to enable other CPUs in the cluster to enter coherency
+	safely.
+
+	Next state:	CLUSTER_UP/INBOUND_COMING_UP (inbound)
+	Conditions:	cluster-level setup and hardware coherency complete
+	Trigger events:	(spontaneous)
+
+
+CLUSTER_UP/INBOUND_COMING_UP:
+
+	Cluster-level setup is complete and hardware coherency is
+	enabled for the cluster.  Other CPUs in the cluster can safely
+	enter coherency.
+
+	This is a transient state, leading immediately to
+	CLUSTER_UP/INBOUND_NOT_COMING_UP.  All other CPUs on the cluster
+	should consider treat these two states as equivalent.
+
+	Next state:	CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
+	Conditions:	none
+	Trigger events:	(spontaneous)
+
+
+CLUSTER_UP/INBOUND_NOT_COMING_UP:
+
+	Cluster-level setup is complete and hardware coherency is
+	enabled for the cluster.  Other CPUs in the cluster can safely
+	enter coherency.
+
+	The cluster will remain in this state until a policy decision is
+	made to power the cluster down.
+
+	Next state:	CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
+	Conditions:	none
+	Trigger events:	policy decision to power down the cluster
+
+
+CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
+
+	An outbound CPU is tearing the cluster down.  The selected CPU
+	must wait in this state until all CPUs in the cluster are in the
+	CPU_DOWN state.
+
+	When all CPUs are in the CPU_DOWN state, the cluster can be torn
+	down, for example by cleaning data caches and exiting
+	cluster-level coherency.
+
+	To avoid wasteful unnecessary teardown operations, the outbound
+	should check the inbound cluster state for asynchronous
+	transitions to INBOUND_COMING_UP.  Alternatively, individual
+	CPUs can be checked for entry into CPU_COMING_UP or CPU_UP.
+
+
+	Next states:
+
+	CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound)
+		Conditions:	cluster torn down and ready to power off
+		Trigger events:	(spontaneous)
+
+	CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound)
+		Conditions:	none
+		Trigger events:
+
+			a) an explicit hardware power-up operation,
+			   resulting from a policy decision on another
+			   CPU;
+
+			b) a hardware event, such as an interrupt.
+
+
+CLUSTER_GOING_DOWN/INBOUND_COMING_UP:
+
+	The cluster is (or was) being torn down, but another CPU has
+	come online in the meantime and is trying to set up the cluster
+	again.
+
+	If the outbound CPU observes this state, it has two choices:
+
+		a) back out of teardown, restoring the cluster to the
+		   CLUSTER_UP state;
+
+		b) finish tearing the cluster down and put the cluster
+		   in the CLUSTER_DOWN state; the inbound CPU will
+		   set up the cluster again from there.
+
+	Choice (a) permits the removal of some latency by avoiding
+	unnecessary teardown and setup operations in situations where
+	the cluster is not really going to be powered down.
+
+
+	Next states:
+
+	CLUSTER_UP/INBOUND_COMING_UP (outbound)
+		Conditions:	cluster-level setup and hardware
+				coherency complete
+		Trigger events:	(spontaneous)
+
+	CLUSTER_DOWN/INBOUND_COMING_UP (outbound)
+		Conditions:	cluster torn down and ready to power off
+		Trigger events:	(spontaneous)
+
+
+Last man and First man selection
+--------------------------------
+
+The CPU which performs cluster tear-down operations on the outbound side
+is commonly referred to as the "last man".
+
+The CPU which performs cluster setup on the inbound side is commonly
+referred to as the "first man".
+
+The race avoidance algorithm documented above does not provide a
+mechanism to choose which CPUs should play these roles.
+
+
+Last man:
+
+When shutting down the cluster, all the CPUs involved are initially
+executing Linux and hence coherent.  Therefore, ordinary spinlocks can
+be used to select a last man safely, before the CPUs become
+non-coherent.
+
+
+First man:
+
+Because CPUs may power up asynchronously in response to external wake-up
+events, a dynamic mechanism is needed to make sure that only one CPU
+attempts to play the first man role and do the cluster-level
+initialisation: any other CPUs must wait for this to complete before
+proceeding.
+
+Cluster-level initialisation may involve actions such as configuring
+coherency controls in the bus fabric.
+
+The current implementation in mcpm_head.S uses a separate mutual exclusion
+mechanism to do this arbitration.  This mechanism is documented in
+detail in vlocks.txt.
+
+
+Features and Limitations
+------------------------
+
+Implementation:
+
+	The current ARM-based implementation is split between
+	arch/arm/common/mcpm_head.S (low-level inbound CPU operations) and
+	arch/arm/common/mcpm_entry.c (everything else):
+
+	__mcpm_cpu_going_down() signals the transition of a CPU to the
+		CPU_GOING_DOWN state.
+
+	__mcpm_cpu_down() signals the transition of a CPU to the CPU_DOWN
+		state.
+
+	A CPU transitions to CPU_COMING_UP and then to CPU_UP via the
+		low-level power-up code in mcpm_head.S.  This could
+		involve CPU-specific setup code, but in the current
+		implementation it does not.
+
+	__mcpm_outbound_enter_critical() and __mcpm_outbound_leave_critical()
+		handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
+		and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
+		the case of an aborted cluster power-down).
+
+		These functions are more complex than the __mcpm_cpu_*()
+		functions due to the extra inter-CPU coordination which
+		is needed for safe transitions at the cluster level.
+
+	A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via
+		the low-level power-up code in mcpm_head.S.  This
+		typically involves platform-specific setup code,
+		provided by the platform-specific power_up_setup
+		function registered via mcpm_sync_init.
+
+Deep topologies:
+
+	As currently described and implemented, the algorithm does not
+	support CPU topologies involving more than two levels (i.e.,
+	clusters of clusters are not supported).  The algorithm could be
+	extended by replicating the cluster-level states for the
+	additional topological levels, and modifying the transition
+	rules for the intermediate (non-outermost) cluster levels.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, in
+collaboration with Nicolas Pitre and Achin Gupta.
+
+Copyright (C) 2012-2013  Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
--- a/Documentation/arm/vlocks.txt
+++ b/Documentation/arm/vlocks.txt
@ -0,0 +1,211 @@
+vlocks for Bare-Metal Mutual Exclusion
+======================================
+
+Voting Locks, or "vlocks" provide a simple low-level mutual exclusion
+mechanism, with reasonable but minimal requirements on the memory
+system.
+
+These are intended to be used to coordinate critical activity among CPUs
+which are otherwise non-coherent, in situations where the hardware
+provides no other mechanism to support this and ordinary spinlocks
+cannot be used.
+
+
+vlocks make use of the atomicity provided by the memory system for
+writes to a single memory location.  To arbitrate, every CPU "votes for
+itself", by storing a unique number to a common memory location.  The
+final value seen in that memory location when all the votes have been
+cast identifies the winner.
+
+In order to make sure that the election produces an unambiguous result
+in finite time, a CPU will only enter the election in the first place if
+no winner has been chosen and the election does not appear to have
+started yet.
+
+
+Algorithm
+---------
+
+The easiest way to explain the vlocks algorithm is with some pseudo-code:
+
+
+	int currently_voting[NR_CPUS] = { 0, };
+	int last_vote = -1; /* no votes yet */
+
+	bool vlock_trylock(int this_cpu)
+	{
+		/* signal our desire to vote */
+		currently_voting[this_cpu] = 1;
+		if (last_vote != -1) {
+			/* someone already volunteered himself */
+			currently_voting[this_cpu] = 0;
+			return false; /* not ourself */
+		}
+
+		/* let's suggest ourself */
+		last_vote = this_cpu;
+		currently_voting[this_cpu] = 0;
+
+		/* then wait until everyone else is done voting */
+		for_each_cpu(i) {
+			while (currently_voting[i] != 0)
+				/* wait */;
+		}
+
+		/* result */
+		if (last_vote == this_cpu)
+			return true; /* we won */
+		return false;
+	}
+
+	bool vlock_unlock(void)
+	{
+		last_vote = -1;
+	}
+
+
+The currently_voting[] array provides a way for the CPUs to determine
+whether an election is in progress, and plays a role analogous to the
+"entering" array in Lamport's bakery algorithm [1].
+
+However, once the election has started, the underlying memory system
+atomicity is used to pick the winner.  This avoids the need for a static
+priority rule to act as a tie-breaker, or any counters which could
+overflow.
+
+As long as the last_vote variable is globally visible to all CPUs, it
+will contain only one value that won't change once every CPU has cleared
+its currently_voting flag.
+
+
+Features and limitations
+------------------------
+
+ * vlocks are not intended to be fair.  In the contended case, it is the
+   _last_ CPU which attempts to get the lock which will be most likely
+   to win.
+
+   vlocks are therefore best suited to situations where it is necessary
+   to pick a unique winner, but it does not matter which CPU actually
+   wins.
+
+ * Like other similar mechanisms, vlocks will not scale well to a large
+   number of CPUs.
+
+   vlocks can be cascaded in a voting hierarchy to permit better scaling
+   if necessary, as in the following hypothetical example for 4096 CPUs:
+
+	/* first level: local election */
+	my_town = towns[(this_cpu >> 4) & 0xf];
+	I_won = vlock_trylock(my_town, this_cpu & 0xf);
+	if (I_won) {
+		/* we won the town election, let's go for the state */
+		my_state = states[(this_cpu >> 8) & 0xf];
+		I_won = vlock_lock(my_state, this_cpu & 0xf));
+		if (I_won) {
+			/* and so on */
+			I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
+			if (I_won) {
+				/* ... */
+			}
+			vlock_unlock(the_whole_country);
+		}
+		vlock_unlock(my_state);
+	}
+	vlock_unlock(my_town);
+
+
+ARM implementation
+------------------
+
+The current ARM implementation [2] contains some optimisations beyond
+the basic algorithm:
+
+ * By packing the members of the currently_voting array close together,
+   we can read the whole array in one transaction (providing the number
+   of CPUs potentially contending the lock is small enough).  This
+   reduces the number of round-trips required to external memory.
+
+   In the ARM implementation, this means that we can use a single load
+   and comparison:
+
+	LDR	Rt, [Rn]
+	CMP	Rt, #0
+
+   ...in place of code equivalent to:
+
+	LDRB	Rt, [Rn]
+	CMP	Rt, #0
+	LDRBEQ	Rt, [Rn, #1]
+	CMPEQ	Rt, #0
+	LDRBEQ	Rt, [Rn, #2]
+	CMPEQ	Rt, #0
+	LDRBEQ	Rt, [Rn, #3]
+	CMPEQ	Rt, #0
+
+   This cuts down on the fast-path latency, as well as potentially
+   reducing bus contention in contended cases.
+
+   The optimisation relies on the fact that the ARM memory system
+   guarantees coherency between overlapping memory accesses of
+   different sizes, similarly to many other architectures.  Note that
+   we do not care which element of currently_voting appears in which
+   bits of Rt, so there is no need to worry about endianness in this
+   optimisation.
+
+   If there are too many CPUs to read the currently_voting array in
+   one transaction then multiple transations are still required.  The
+   implementation uses a simple loop of word-sized loads for this
+   case.  The number of transactions is still fewer than would be
+   required if bytes were loaded individually.
+
+
+   In principle, we could aggregate further by using LDRD or LDM, but
+   to keep the code simple this was not attempted in the initial
+   implementation.
+
+
+ * vlocks are currently only used to coordinate between CPUs which are
+   unable to enable their caches yet.  This means that the
+   implementation removes many of the barriers which would be required
+   when executing the algorithm in cached memory.
+
+   packing of the currently_voting array does not work with cached
+   memory unless all CPUs contending the lock are cache-coherent, due
+   to cache writebacks from one CPU clobbering values written by other
+   CPUs.  (Though if all the CPUs are cache-coherent, you should be
+   probably be using proper spinlocks instead anyway).
+
+
+ * The "no votes yet" value used for the last_vote variable is 0 (not
+   -1 as in the pseudocode).  This allows statically-allocated vlocks
+   to be implicitly initialised to an unlocked state simply by putting
+   them in .bss.
+
+   An offset is added to each CPU's ID for the purpose of setting this
+   variable, so that no CPU uses the value 0 for its ID.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, for
+use in ARM-based big.LITTLE platforms, with review and input gratefully
+received from Nicolas Pitre and Achin Gupta.  Thanks to Nicolas for
+grabbing most of this text out of the relevant mail thread and writing
+up the pseudocode.
+
+Copyright (C) 2012-2013  Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
+
+
+References
+----------
+
+[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming
+    Problem", Communications of the ACM 17, 8 (August 1974), 453-455.
+
+    http://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm
+
+[2] linux/arch/arm/common/vlock.S, www.kernel.org.
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@ -30,6 +30,7 @@ The target is named "raid" and it accepts the following parameters:
  raid10        Various RAID10 inspired algorithms chosen by additional params
 		- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
 		- RAID1E: Integrated Adjacent Stripe Mirroring
+		- RAID1E: Integrated Offset Stripe Mirroring
 		-  and other similar RAID10 variants

  Reference: Chapter 4 of
@ -64,15 +65,15 @@ The target is named "raid" and it accepts the following parameters:
 		synchronisation state for each region.

        [raid10_copies   <# copies>]
-        [raid10_format   near]
+        [raid10_format   <near|far|offset>]
 		These two options are used to alter the default layout of
 		a RAID10 configuration.  The number of copies is can be
-		specified, but the default is 2.  There are other variations
-		to how the copies are laid down - the default and only current
-		option is "near".  Near copies are what most people think of
-		with respect to mirroring.  If these options are left
-		unspecified, or 'raid10_copies 2' and/or 'raid10_format near'
-		are given, then the layouts for 2, 3 and 4 devices are:
+		specified, but the default is 2.  There are also three
+		variations to how the copies are laid down - the default
+		is "near".  Near copies are what most people think of with
+		respect to mirroring.  If these options are left unspecified,
+		or 'raid10_copies 2' and/or 'raid10_format near' are given,
+		then the layouts for 2, 3 and 4 devices	are:
 		2 drives         3 drives          4 drives
 		--------         ----------        --------------
 		A1  A1           A1  A1  A2        A1  A1  A2  A2
@ -85,6 +86,33 @@ The target is named "raid" and it accepts the following parameters:
 		3-device layout is what might be called a 'RAID1E - Integrated
 		Adjacent Stripe Mirroring'.

+		If 'raid10_copies 2' and 'raid10_format far', then the layouts
+		for 2, 3 and 4 devices are:
+		2 drives             3 drives             4 drives
+		--------             --------------       --------------------
+		A1  A2               A1   A2   A3         A1   A2   A3   A4
+		A3  A4               A4   A5   A6         A5   A6   A7   A8
+		A5  A6               A7   A8   A9         A9   A10  A11  A12
+		..  ..               ..   ..   ..         ..   ..   ..   ..
+		A2  A1               A3   A1   A2         A2   A1   A4   A3
+		A4  A3               A6   A4   A5         A6   A5   A8   A7
+		A6  A5               A9   A7   A8         A10  A9   A12  A11
+		..  ..               ..   ..   ..         ..   ..   ..   ..
+
+		If 'raid10_copies 2' and 'raid10_format offset', then the
+		layouts for 2, 3 and 4 devices are:
+		2 drives       3 drives           4 drives
+		--------       ------------       -----------------
+		A1  A2         A1  A2  A3         A1  A2  A3  A4
+		A2  A1         A3  A1  A2         A2  A1  A4  A3
+		A3  A4         A4  A5  A6         A5  A6  A7  A8
+		A4  A3         A6  A4  A5         A6  A5  A8  A7
+		A5  A6         A7  A8  A9         A9  A10 A11 A12
+		A6  A5         A9  A7  A8         A10 A9  A12 A11
+		..  ..         ..  ..  ..         ..  ..  ..  ..
+		Here we see layouts closely akin to 'RAID1E - Integrated
+		Offset Stripe Mirroring'.
+
 <#raid_devs>: The number of devices composing the array.
 	Each device consists of two entries.  The first is the device
 	containing the metadata (if any); the second is the one containing the
@ -142,3 +170,5 @@ Version History
 1.3.0	Added support for RAID 10
 1.3.1	Allow device replacement/rebuild for RAID 10
 1.3.2   Fix/improve redundancy checking for RAID10
+1.4.0	Non-functional change.  Removes arg from mapping function.
+1.4.1   Add RAID10 "far" and "offset" algorithm support.
--- a/Documentation/devicetree/bindings/mfd/ab8500.txt
+++ b/Documentation/devicetree/bindings/mfd/ab8500.txt
@ -13,9 +13,6 @@ Required parent device properties:
                                  4 = active high level-sensitive
                                  8 = active low level-sensitive

-Optional parent device properties:
- reg                    : contains the PRCMU mailbox address for the AB8500 i2c port
-
 The AB8500 consists of a large and varied group of sub-devices:

 Device                     IRQ Names              Supply Names   Description
@ -86,9 +83,8 @@ Non-standard child device properties:
   - stericsson,amic2-bias-vamic1           : Analoge Mic wishes to use a non-standard Vamic
   - stericsson,earpeice-cmv                : Earpeice voltage (only: 950 | 1100 | 1270 | 1580)

-ab8500@5 {
+ab8500 {
         compatible = "stericsson,ab8500";
-         reg = <5>; /* mailbox 5 is i2c */
         interrupts = <0 40 0x4>;
         interrupt-controller;
         #interrupt-cells = <2>;
--- a/Documentation/devicetree/bindings/tty/serial/of-serial.txt
+++ b/Documentation/devicetree/bindings/tty/serial/of-serial.txt
@ -11,6 +11,9 @@ Required properties:
 	- "nvidia,tegra20-uart"
 	- "nxp,lpc3220-uart"
 	- "ibm,qpace-nwp-serial"
+	- "altr,16550-FIFO32"
+	- "altr,16550-FIFO64"
+	- "altr,16550-FIFO128"
 	- "serial" if the port type is unknown.
 - reg : offset and length of the register set for the device.
 - interrupts : should contain uart interrupt.
--- a/Documentation/hwmon/adm1275
+++ b/Documentation/hwmon/adm1275
@ -15,7 +15,7 @@ Supported chips:
    Addresses scanned: -
    Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1276.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/adt7410
+++ b/Documentation/hwmon/adt7410
@ -4,9 +4,14 @@ Kernel driver adt7410
 Supported chips:
  * Analog Devices ADT7410
    Prefix: 'adt7410'
-    Addresses scanned: I2C 0x48 - 0x4B
+    Addresses scanned: None
    Datasheet: Publicly available at the Analog Devices website
               http://www.analog.com/static/imported-files/data_sheets/ADT7410.pdf
+  * Analog Devices ADT7420
+    Prefix: 'adt7420'
+    Addresses scanned: None
+    Datasheet: Publicly available at the Analog Devices website
+               http://www.analog.com/static/imported-files/data_sheets/ADT7420.pdf

 Author: Hartmut Knaack <knaack.h@gmx.de>

@ -27,6 +32,10 @@ value per second or even justget one sample on demand for power saving.
 Besides, it can completely power down its ADC, if power management is
 required.

+The ADT7420 is register compatible, the only differences being the package,
+a slightly narrower operating temperature range (-40°C to +150°C), and a
+better accuracy (0.25°C instead of 0.50°C.)
+
 Configuration Notes
 -------------------

--- a/Documentation/hwmon/jc42
+++ b/Documentation/hwmon/jc42
@ -49,7 +49,7 @@ Supported chips:
    Addresses scanned: I2C 0x18 - 0x1f

 Author:
-	Guenter Roeck <guenter.roeck@ericsson.com>
+	Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/lineage-pem
+++ b/Documentation/hwmon/lineage-pem
@ -8,7 +8,7 @@ Supported devices:
    Documentation:
        http://www.lineagepower.com/oem/pdf/CPLI2C.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/lm25066
+++ b/Documentation/hwmon/lm25066
@ -19,7 +19,7 @@ Supported chips:
    Datasheet:
 	http://www.national.com/pf/LM/LM5066.html

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/ltc2978
+++ b/Documentation/hwmon/ltc2978
@ -5,13 +5,13 @@ Supported chips:
  * Linear Technology LTC2978
    Prefix: 'ltc2978'
    Addresses scanned: -
-    Datasheet: http://cds.linear.com/docs/Datasheet/2978fa.pdf
+    Datasheet: http://www.linear.com/product/ltc2978
  * Linear Technology LTC3880
    Prefix: 'ltc3880'
    Addresses scanned: -
-    Datasheet: http://cds.linear.com/docs/Datasheet/3880f.pdf
+    Datasheet: http://www.linear.com/product/ltc3880

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/ltc4261
+++ b/Documentation/hwmon/ltc4261
@ -8,7 +8,7 @@ Supported chips:
    Datasheet:
        http://cds.linear.com/docs/Datasheet/42612fb.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/max16064
+++ b/Documentation/hwmon/max16064
@ -7,7 +7,7 @@ Supported chips:
    Addresses scanned: -
    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX16064.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/max16065
+++ b/Documentation/hwmon/max16065
@ -24,7 +24,7 @@ Supported chips:
 	http://datasheets.maxim-ic.com/en/ds/MAX16070-MAX16071.pdf


-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/max34440
+++ b/Documentation/hwmon/max34440
@ -27,7 +27,7 @@ Supported chips:
    Addresses scanned: -
    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX34461.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/max8688
+++ b/Documentation/hwmon/max8688
@ -7,7 +7,7 @@ Supported chips:
    Addresses scanned: -
    Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8688.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/pmbus
+++ b/Documentation/hwmon/pmbus
@ -34,7 +34,7 @@ Supported chips:
    Addresses scanned: -
    Datasheet: n.a.

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/smm665
+++ b/Documentation/hwmon/smm665
@ -29,7 +29,7 @@ Supported chips:
      http://www.summitmicro.com/prod_select/summary/SMM766/SMM766_2086.pdf
      http://www.summitmicro.com/prod_select/summary/SMM766B/SMM766B_2122.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Module Parameters
--- a/Documentation/hwmon/ucd9000
+++ b/Documentation/hwmon/ucd9000
@ -11,7 +11,7 @@ Supported chips:
 	http://focus.ti.com/lit/ds/symlink/ucd9090.pdf
 	http://focus.ti.com/lit/ds/symlink/ucd90910.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/ucd9200
+++ b/Documentation/hwmon/ucd9200
@ -15,7 +15,7 @@ Supported chips:
 	http://focus.ti.com/lit/ds/symlink/ucd9246.pdf
 	http://focus.ti.com/lit/ds/symlink/ucd9248.pdf

-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/hwmon/zl6100
+++ b/Documentation/hwmon/zl6100
@ -54,7 +54,7 @@ http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146401
 http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146256


-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>


 Description
--- a/Documentation/input/alps.txt
+++ b/Documentation/input/alps.txt
@ -3,10 +3,26 @@ ALPS Touchpad Protocol

 Introduction
 ------------
+Currently the ALPS touchpad driver supports five protocol versions in use by
+ALPS touchpads, called versions 1, 2, 3, 4 and 5.

-Currently the ALPS touchpad driver supports four protocol versions in use by
-ALPS touchpads, called versions 1, 2, 3, and 4. Information about the various
-protocol versions is contained in the following sections.
+Since roughly mid-2010 several new ALPS touchpads have been released and
+integrated into a variety of laptops and netbooks.  These new touchpads
+have enough behavior differences that the alps_model_data definition
+table, describing the properties of the different versions, is no longer
+adequate.  The design choices were to re-define the alps_model_data
+table, with the risk of regression testing existing devices, or isolate
+the new devices outside of the alps_model_data table.  The latter design
+choice was made.  The new touchpad signatures are named: "Rushmore",
+"Pinnacle", and "Dolphin", which you will see in the alps.c code.
+For the purposes of this document, this group of ALPS touchpads will
+generically be called "new ALPS touchpads".
+
+We experimented with probing the ACPI interface _HID (Hardware ID)/_CID
+(Compatibility ID) definition as a way to uniquely identify the
+different ALPS variants but there did not appear to be a 1:1 mapping.
+In fact, it appeared to be an m:n mapping between the _HID and actual
+hardware type.

 Detection
 ---------
@ -20,9 +36,13 @@ If the E6 report is successful, the touchpad model is identified using the "E7
 report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is
 matched against known models in the alps_model_data_array.

-With protocol versions 3 and 4, the E7 report model signature is always
-73-02-64. To differentiate between these versions, the response from the
-"Enter Command Mode" sequence must be inspected as described below.
+For older touchpads supporting protocol versions 3 and 4, the E7 report
+model signature is always 73-02-64. To differentiate between these
+versions, the response from the "Enter Command Mode" sequence must be
+inspected as described below.
+
+The new ALPS touchpads have an E7 signature of 73-03-50 or 73-03-0A but
+seem to be better differentiated by the EC Command Mode response.

 Command Mode
 ------------
@ -47,6 +67,14 @@ address of the register being read, and the third contains the value of the
 register. Registers are written by writing the value one nibble at a time
 using the same encoding used for addresses.

+For the new ALPS touchpads, the EC command is used to enter command
+mode. The response in the new ALPS touchpads is significantly different,
+and more important in determining the behavior.  This code has been
+separated from the original alps_model_data table and put in the
+alps_identify function.  For example, there seem to be two hardware init
+sequences for the "Dolphin" touchpads as determined by the second byte
+of the EC response.
+
 Packet Format
 -------------

@ -187,3 +215,28 @@ There are several things worth noting here.
    well.

 So far no v4 devices with tracksticks have been encountered.
+
+ALPS Absolute Mode - Protocol Version 5
+---------------------------------------
+This is basically Protocol Version 3 but with different logic for packet
+decode.  It uses the same alps_process_touchpad_packet_v3 call with a
+specialized decode_fields function pointer to correctly interpret the
+packets.  This appears to only be used by the Dolphin devices.
+
+For single-touch, the 6-byte packet format is:
+
+ byte 0:    1    1    0    0    1    0    0    0
+ byte 1:    0   x6   x5   x4   x3   x2   x1   x0
+ byte 2:    0   y6   y5   y4   y3   y2   y1   y0
+ byte 3:    0    M    R    L    1    m    r    l
+ byte 4:   y10  y9   y8   y7  x10   x9   x8   x7
+ byte 5:    0   z6   z5   z4   z3   z2   z1   z0
+
+For mt, the format is:
+
+ byte 0:    1    1    1    n3   1   n2   n1   x24
+ byte 1:    1   y7   y6    y5  y4   y3   y2    y1
+ byte 2:    ?   x2   x1   y12 y11  y10   y9    y8
+ byte 3:    0  x23  x22   x21 x20  x19  x18   x17
+ byte 4:    0   x9   x8    x7  x6   x5   x4    x3
+ byte 5:    0  x16  x15   x14 x13  x12  x11   x10
--- a/Documentation/networking/tuntap.txt
+++ b/Documentation/networking/tuntap.txt
@ -105,6 +105,83 @@ Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
     Proto [2 bytes]
     Raw protocol(IP, IPv6, etc) frame.

+  3.3 Multiqueue tuntap interface:
+
+  From version 3.8, Linux supports multiqueue tuntap which can uses multiple
+  file descriptors (queues) to parallelize packets sending or receiving. The
+  device allocation is the same as before, and if user wants to create multiple
+  queues, TUNSETIFF with the same device name must be called many times with
+  IFF_MULTI_QUEUE flag.
+
+  char *dev should be the name of the device, queues is the number of queues to
+  be created, fds is used to store and return the file descriptors (queues)
+  created to the caller. Each file descriptor were served as the interface of a
+  queue which could be accessed by userspace.
+
+  #include <linux/if.h>
+  #include <linux/if_tun.h>
+
+  int tun_alloc_mq(char *dev, int queues, int *fds)
+  {
+      struct ifreq ifr;
+      int fd, err, i;
+
+      if (!dev)
+          return -1;
+
+      memset(&ifr, 0, sizeof(ifr));
+      /* Flags: IFF_TUN   - TUN device (no Ethernet headers)
+       *        IFF_TAP   - TAP device
+       *
+       *        IFF_NO_PI - Do not provide packet information
+       *        IFF_MULTI_QUEUE - Create a queue of multiqueue device
+       */
+      ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE;
+      strcpy(ifr.ifr_name, dev);
+
+      for (i = 0; i < queues; i++) {
+          if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
+             goto err;
+          err = ioctl(fd, TUNSETIFF, (void *)&ifr);
+          if (err) {
+             close(fd);
+             goto err;
+          }
+          fds[i] = fd;
+      }
+
+      return 0;
+  err:
+      for (--i; i >= 0; i--)
+          close(fds[i]);
+      return err;
+  }
+
+  A new ioctl(TUNSETQUEUE) were introduced to enable or disable a queue. When
+  calling it with IFF_DETACH_QUEUE flag, the queue were disabled. And when
+  calling it with IFF_ATTACH_QUEUE flag, the queue were enabled. The queue were
+  enabled by default after it was created through TUNSETIFF.
+
+  fd is the file descriptor (queue) that we want to enable or disable, when
+  enable is true we enable it, otherwise we disable it
+
+  #include <linux/if.h>
+  #include <linux/if_tun.h>
+
+  int tun_set_queue(int fd, int enable)
+  {
+      struct ifreq ifr;
+
+      memset(&ifr, 0, sizeof(ifr));
+
+      if (enable)
+         ifr.ifr_flags = IFF_ATTACH_QUEUE;
+      else
+         ifr.ifr_flags = IFF_DETACH_QUEUE;
+
+      return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
+  }
+
 Universal TUN/TAP device driver Frequently Asked Question.
   
 1. What platforms are supported by TUN/TAP driver ?
--- a/Documentation/power/opp.txt
+++ b/Documentation/power/opp.txt
@ -1,6 +1,5 @@
-*=============*
-* OPP Library *
-*=============*
+Operating Performance Points (OPP) Library
+==========================================

 (C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated

@ -16,15 +15,31 @@ Contents

 1. Introduction
 ===============
+1.1 What is an Operating Performance Point (OPP)?
+
 Complex SoCs of today consists of a multiple sub-modules working in conjunction.
 In an operational system executing varied use cases, not all modules in the SoC
 need to function at their highest performing frequency all the time. To
 facilitate this, sub-modules in a SoC are grouped into domains, allowing some
-domains to run at lower voltage and frequency while other domains are loaded
-more. The set of discrete tuples consisting of frequency and voltage pairs that
+domains to run at lower voltage and frequency while other domains run at
+voltage/frequency pairs that are higher.
+
+The set of discrete tuples consisting of frequency and voltage pairs that
 the device will support per domain are called Operating Performance Points or
 OPPs.

+As an example:
+Let us consider an MPU device which supports the following:
+{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V},
+{1GHz at minimum voltage of 1.3V}
+
+We can represent these as three OPPs as the following {Hz, uV} tuples:
+{300000000, 1000000}
+{800000000, 1200000}
+{1000000000, 1300000}
+
+1.2 Operating Performance Points Library
+
 OPP library provides a set of helper functions to organize and query the OPP
 information. The library is located in drivers/base/power/opp.c and the header
 is located in include/linux/opp.h. OPP library can be enabled by enabling
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@ -170,5 +170,5 @@ Reminder: sizeof() result is of type size_t.
 Thank you for your cooperation and attention.


-By Randy Dunlap <rdunlap@xenotime.net> and
+By Randy Dunlap <rdunlap@infradead.org> and
 Andrew Murray <amurray@mpc-data.co.uk>
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@ -1873,7 +1873,7 @@ feature:

 	status\input  |     0      |     1      |    else    |
 	--------------+------------+------------+------------+
-	not allocated |(do nothing)| alloc+swap |   EINVAL   |
+	not allocated |(do nothing)| alloc+swap |(do nothing)|
 	--------------+------------+------------+------------+
 	allocated     |    free    |    swap    |   clear    |
 	--------------+------------+------------+------------+
--- a/45
+++ b/45
@ -114,12 +114,6 @@ Maintainers List (try to look for most precise areas first)

 		-----------------------------------

-3C505 NETWORK DRIVER
-M:	Philip Blundell <philb@gnu.org>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/i825xx/3c505*
-
 3C59X NETWORK DRIVER
 M:	Steffen Klassert <klassert@mathematik.tu-chemnitz.de>
 L:	netdev@vger.kernel.org
@ -2361,12 +2355,6 @@ W:	http://www.arm.linux.org.uk/
 S:	Maintained
 F:	drivers/video/cyber2000fb.*

-CYCLADES 2X SYNC CARD DRIVER
-M:	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
-W:	http://oops.ghostprotocols.net:81/blog
-S:	Maintained
-F:	drivers/net/wan/cycx*
-
 CYCLADES ASYNC MUX DRIVER
 W:	http://www.cyclades.com/
 S:	Orphan
@ -3067,12 +3055,6 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kristoffer/linux-hpc.git
 F:	drivers/video/s1d13xxxfb.c
 F:	include/video/s1d13xxxfb.h

-ETHEREXPRESS-16 NETWORK DRIVER
-M:	Philip Blundell <philb@gnu.org>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/i825xx/eexpress.*
-
 ETHERNET BRIDGE
 M:	Stephen Hemminger <stephen@networkplumber.org>
 L:	bridge@lists.linux-foundation.org
@ -4023,6 +4005,22 @@ M:	Stanislaw Gruszka <stf_xl@wp.pl>
 S:	Maintained
 F:	drivers/usb/atm/ueagle-atm.c

+INA209 HARDWARE MONITOR DRIVER
+M:	Guenter Roeck <linux@roeck-us.net>
+L:	lm-sensors@lm-sensors.org
+S:	Maintained
+F:	Documentation/hwmon/ina209
+F:	Documentation/devicetree/bindings/i2c/ina209.txt
+F:	drivers/hwmon/ina209.c
+
+INA2XX HARDWARE MONITOR DRIVER
+M:	Guenter Roeck <linux@roeck-us.net>
+L:	lm-sensors@lm-sensors.org
+S:	Maintained
+F:	Documentation/hwmon/ina2xx
+F:	drivers/hwmon/ina2xx.c
+F:	include/linux/platform_data/ina2xx.h
+
 INDUSTRY PACK SUBSYSTEM (IPACK)
 M:	Samuel Iglesias Gonsalvez <siglesias@igalia.com>
 M:	Jens Taprogge <jens.taprogge@taprogge.org>
@ -5116,6 +5114,15 @@ S:	Maintained
 F:	Documentation/hwmon/max6650
 F:	drivers/hwmon/max6650.c

+MAX6697 HARDWARE MONITOR DRIVER
+M:	Guenter Roeck <linux@roeck-us.net>
+L:	lm-sensors@lm-sensors.org
+S:	Maintained
+F:	Documentation/hwmon/max6697
+F:	Documentation/devicetree/bindings/i2c/max6697.txt
+F:	drivers/hwmon/max6697.c
+F:	include/linux/platform_data/max6697.h
+
 MAXIRADIO FM RADIO RECEIVER DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
@ -6430,6 +6437,8 @@ F:	Documentation/networking/LICENSE.qla3xxx
 F:	drivers/net/ethernet/qlogic/qla3xxx.*

 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
+M:	Rajesh Borundia <rajesh.borundia@qlogic.com>
+M:	Shahed Shaikh <shahed.shaikh@qlogic.com>
 M:	Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
 M:	Sony Chacko <sony.chacko@qlogic.com>
 M:	linux-driver@qlogic.com
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc3
 NAME = Unicycling Gorilla

 # *DOCUMENTATION*
--- a/arch/Kconfig
+++ b/arch/Kconfig
@ -319,13 +319,6 @@ config ARCH_WANT_OLD_COMPAT_IPC
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	bool

-config HAVE_VIRT_TO_BUS
-	bool
-	help
-	  An architecture should select this if it implements the
-	  deprecated interface virt_to_bus().  All new architectures
-	  should probably not select this.
-
 config HAVE_ARCH_SECCOMP_FILTER
 	bool
 	help
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@ -9,7 +9,7 @@ config ALPHA
 	select HAVE_PERF_EVENTS
 	select HAVE_DMA_ATTRS
 	select HAVE_GENERIC_HARDIRQS
-	select HAVE_VIRT_TO_BUS
+	select VIRT_TO_BUS
 	select GENERIC_IRQ_PROBE
 	select AUTO_IRQ_AFFINITY if SMP
 	select GENERIC_IRQ_SHOW
--- a/arch/alpha/boot/head.S
+++ b/arch/alpha/boot/head.S
@ -4,6 +4,7 @@
 * initial bootloader stuff..
 */

+#include <asm/pal.h>

 	.set noreorder
 	.globl	__start
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@ -49,7 +49,7 @@ config ARM
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UID16
-	select HAVE_VIRT_TO_BUS
+	select VIRT_TO_BUS
 	select KTIME_SCALAR
 	select PERF_USE_VMALLOC
 	select RTC_LIB
@ -556,7 +556,6 @@ config ARCH_IXP4XX
 config ARCH_DOVE
 	bool "Marvell Dove"
 	select ARCH_REQUIRE_GPIOLIB
-	select COMMON_CLK_DOVE
 	select CPU_V7
 	select GENERIC_CLOCKEVENTS
 	select MIGHT_HAVE_PCI
@ -1600,6 +1599,14 @@ config HAVE_ARM_TWD
 	help
 	  This options enables support for the ARM timer and watchdog unit

+config MCPM
+	bool "Multi-Cluster Power Management"
+	depends on CPU_V7 && SMP
+	help
+	  This option provides the common power management infrastructure
+	  for (multi-)cluster based systems, such as big.LITTLE based
+	  systems.
+
 choice
 	prompt "Memory split"
 	default VMSPLIT_3G
@ -1657,13 +1664,16 @@ config LOCAL_TIMERS
 	  accounting to be spread across the timer interval, preventing a
 	  "thundering herd" at every timer tick.

+# The GPIO number here must be sorted by descending number. In case of
+# a multiplatform kernel, we just want the highest value required by the
+# selected platforms.
 config ARCH_NR_GPIO
 	int
 	default 1024 if ARCH_SHMOBILE || ARCH_TEGRA
-	default 355 if ARCH_U8500
-	default 264 if MACH_H4700
 	default 512 if SOC_OMAP5
+	default 355 if ARCH_U8500
 	default 288 if ARCH_VT8500 || ARCH_SUNXI
+	default 264 if MACH_H4700
 	default 0
 	help
 	  Maximum number of GPIOs in the system.
@ -1888,8 +1898,9 @@ config XEN_DOM0

 config XEN
 	bool "Xen guest support on ARM (EXPERIMENTAL)"
-	depends on ARM && OF
+	depends on ARM && AEABI && OF
 	depends on CPU_V7 && !CPU_V6
+	depends on !GENERIC_ATOMIC64
 	help
 	  Say Y if you want to run Linux in a Virtual Machine on Xen on ARM.

--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@ -492,7 +492,7 @@ config DEBUG_IMX_UART_PORT
 						DEBUG_IMX31_UART || \
 						DEBUG_IMX35_UART || \
 						DEBUG_IMX51_UART || \
-						DEBUG_IMX50_IMX53_UART || \
+						DEBUG_IMX53_UART || \
 						DEBUG_IMX6Q_UART
 	default 1
 	help
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@ -115,4 +115,4 @@ i:
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
 	$(obj)/Image System.map "$(INSTALL_PATH)"

-subdir-	    := bootp compressed
+subdir-	    := bootp compressed dts
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@ -120,7 +120,7 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif

-ccflags-y := -fpic -fno-builtin -I$(obj)
+ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -Wa,-march=all -DZIMAGE

 # Supply kernel BSS size to the decompressor via a linker symbol.
--- a/arch/arm/boot/dts/armada-370-rd.dts
+++ b/arch/arm/boot/dts/armada-370-rd.dts
@ -64,5 +64,13 @@
 			status = "okay";
 			/* No CD or WP GPIOs */
 		};
+
+		usb@d0050000 {
+			status = "okay";
+		};
+
+		usb@d0051000 {
+			status = "okay";
+		};
 	};
 };
--- a/arch/arm/boot/dts/armada-370-xp.dtsi
+++ b/arch/arm/boot/dts/armada-370-xp.dtsi
@ -31,7 +31,6 @@
 	mpic: interrupt-controller@d0020000 {
 	      compatible = "marvell,mpic";
 	      #interrupt-cells = <1>;
-	      #address-cells = <1>;
 	      #size-cells = <1>;
 	      interrupt-controller;
 	};
@ -54,7 +53,7 @@
 				reg = <0xd0012000 0x100>;
 				reg-shift = <2>;
 				interrupts = <41>;
-				reg-io-width = <4>;
+				reg-io-width = <1>;
 				status = "disabled";
 		};
 		serial@d0012100 {
@ -62,7 +61,7 @@
 				reg = <0xd0012100 0x100>;
 				reg-shift = <2>;
 				interrupts = <42>;
-				reg-io-width = <4>;
+				reg-io-width = <1>;
 				status = "disabled";
 		};

--- a/arch/arm/boot/dts/armada-xp.dtsi
+++ b/arch/arm/boot/dts/armada-xp.dtsi
@ -46,7 +46,7 @@
 				reg = <0xd0012200 0x100>;
 				reg-shift = <2>;
 				interrupts = <43>;
-				reg-io-width = <4>;
+				reg-io-width = <1>;
 				status = "disabled";
 		};
 		serial@d0012300 {
@ -54,7 +54,7 @@
 				reg = <0xd0012300 0x100>;
 				reg-shift = <2>;
 				interrupts = <44>;
-				reg-io-width = <4>;
+				reg-io-width = <1>;
 				status = "disabled";
 		};

--- a/arch/arm/boot/dts/bcm2835.dtsi
+++ b/arch/arm/boot/dts/bcm2835.dtsi
@ -105,7 +105,7 @@
 			compatible = "fixed-clock";
 			reg = <1>;
 			#clock-cells = <0>;
-			clock-frequency = <150000000>;
+			clock-frequency = <250000000>;
 		};
 	};
 };
--- a/arch/arm/boot/dts/dbx5x0.dtsi
+++ b/arch/arm/boot/dts/dbx5x0.dtsi
@ -319,9 +319,8 @@
 				};
 			};

-			ab8500@5 {
+			ab8500 {
 				compatible = "stericsson,ab8500";
-				reg = <5>; /* mailbox 5 is i2c */
 				interrupt-parent = <&intc>;
 				interrupts = <0 40 0x4>;
 				interrupt-controller;
--- a/arch/arm/boot/dts/dove.dtsi
+++ b/arch/arm/boot/dts/dove.dtsi
@ -197,6 +197,11 @@
 			status = "disabled";
 		};

+		rtc@d8500 {
+			compatible = "marvell,orion-rtc";
+			reg = <0xd8500 0x20>;
+		};
+
 		crypto: crypto@30000 {
 			compatible = "marvell,orion-crypto";
 			reg = <0x30000 0x10000>,
--- a/arch/arm/boot/dts/href.dtsi
+++ b/arch/arm/boot/dts/href.dtsi
@ -221,7 +221,7 @@
 				};
 			};

-			ab8500@5 {
+			ab8500 {
 				ab8500-regulators {
 					ab8500_ldo_aux1_reg: ab8500_ldo_aux1 {
 						regulator-name = "V-DISPLAY";
--- a/arch/arm/boot/dts/hrefv60plus.dts
+++ b/arch/arm/boot/dts/hrefv60plus.dts
@ -158,7 +158,7 @@
 				};
 			};

-			ab8500@5 {
+			ab8500 {
 				ab8500-regulators {
 					ab8500_ldo_aux1_reg: ab8500_ldo_aux1 {
 						regulator-name = "V-DISPLAY";
--- a/arch/arm/boot/dts/imx53-mba53.dts
+++ b/arch/arm/boot/dts/imx53-mba53.dts
@ -42,10 +42,9 @@
 			fsl,pins = <689 0x10000		/* DISP1_DRDY	*/
 				    482 0x10000		/* DISP1_HSYNC	*/
 				    489 0x10000		/* DISP1_VSYNC	*/
-				    684 0x10000		/* DISP1_DAT_0	*/
 				    515 0x10000		/* DISP1_DAT_22	*/
 				    523 0x10000		/* DISP1_DAT_23	*/
-				    543 0x10000		/* DISP1_DAT_21	*/
+				    545 0x10000		/* DISP1_DAT_21	*/
 				    553 0x10000		/* DISP1_DAT_20	*/
 				    558 0x10000		/* DISP1_DAT_19	*/
 				    564 0x10000		/* DISP1_DAT_18	*/
--- a/arch/arm/boot/dts/kirkwood-dns320.dts
+++ b/arch/arm/boot/dts/kirkwood-dns320.dts
@ -42,12 +42,10 @@

 	ocp@f1000000 {
 		serial@12000 {
-			clock-frequency = <166666667>;
 			status = "okay";
 		};

 		serial@12100 {
-			clock-frequency = <166666667>;
 			status = "okay";
 		};
 	};
--- a/arch/arm/boot/dts/kirkwood-dns325.dts
+++ b/arch/arm/boot/dts/kirkwood-dns325.dts
@ -50,7 +50,6 @@
 			};
 		};
 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "okay";
 		};
 	};
--- a/arch/arm/boot/dts/kirkwood-dockstar.dts
+++ b/arch/arm/boot/dts/kirkwood-dockstar.dts
@ -37,7 +37,6 @@
 			};
 		};
 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood-dreamplug.dts
+++ b/arch/arm/boot/dts/kirkwood-dreamplug.dts
@ -38,7 +38,6 @@
 			};
 		};
 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood-goflexnet.dts
+++ b/arch/arm/boot/dts/kirkwood-goflexnet.dts
@ -73,7 +73,6 @@
 			};
 		};
 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood-ib62x0.dts
+++ b/arch/arm/boot/dts/kirkwood-ib62x0.dts
@ -51,7 +51,6 @@
 			};
 		};
 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "okay";
 		};

--- a/arch/arm/boot/dts/kirkwood-iconnect.dts
+++ b/arch/arm/boot/dts/kirkwood-iconnect.dts
@ -78,7 +78,6 @@
 			};
 		};
 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts
+++ b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts
@ -115,7 +115,6 @@
 		};

 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood-km_kirkwood.dts
+++ b/arch/arm/boot/dts/kirkwood-km_kirkwood.dts
@ -34,7 +34,6 @@
 		};

 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood-lschlv2.dts
+++ b/arch/arm/boot/dts/kirkwood-lschlv2.dts
@ -13,7 +13,6 @@

 	ocp@f1000000 {
 		serial@12000 {
-			clock-frequency = <166666667>;
 			status = "okay";
 		};
 	};
--- a/arch/arm/boot/dts/kirkwood-lsxhl.dts
+++ b/arch/arm/boot/dts/kirkwood-lsxhl.dts
@ -13,7 +13,6 @@

 	ocp@f1000000 {
 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "okay";
 		};
 	};
--- a/arch/arm/boot/dts/kirkwood-mplcec4.dts
+++ b/arch/arm/boot/dts/kirkwood-mplcec4.dts
@ -90,7 +90,6 @@
                };

                serial@12000 {
-                        clock-frequency = <200000000>;
                        status = "ok";
                };

--- a/arch/arm/boot/dts/kirkwood-ns2-common.dtsi
+++ b/arch/arm/boot/dts/kirkwood-ns2-common.dtsi
@ -23,7 +23,6 @@
 		};

 		serial@12000 {
-			clock-frequency = <166666667>;
 			status = "okay";
 		};

--- a/arch/arm/boot/dts/kirkwood-nsa310.dts
+++ b/arch/arm/boot/dts/kirkwood-nsa310.dts
@ -117,7 +117,6 @@
 		};

 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood-openblocks_a6.dts
+++ b/arch/arm/boot/dts/kirkwood-openblocks_a6.dts
@ -18,12 +18,10 @@

 	ocp@f1000000 {
 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

 		serial@12100 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood-topkick.dts
+++ b/arch/arm/boot/dts/kirkwood-topkick.dts
@ -108,7 +108,6 @@
 		};

 		serial@12000 {
-			clock-frequency = <200000000>;
 			status = "ok";
 		};

--- a/arch/arm/boot/dts/kirkwood.dtsi
+++ b/arch/arm/boot/dts/kirkwood.dtsi
@ -38,6 +38,7 @@
 			interrupt-controller;
 			#interrupt-cells = <2>;
 			interrupts = <35>, <36>, <37>, <38>;
+			clocks = <&gate_clk 7>;
 		};

 		gpio1: gpio@10140 {
@ -49,6 +50,7 @@
 			interrupt-controller;
 			#interrupt-cells = <2>;
 			interrupts = <39>, <40>, <41>;
+			clocks = <&gate_clk 7>;
 		};

 		serial@12000 {
@ -57,7 +59,6 @@
 			reg-shift = <2>;
 			interrupts = <33>;
 			clocks = <&gate_clk 7>;
-			/* set clock-frequency in board dts */
 			status = "disabled";
 		};

@ -67,7 +68,6 @@
 			reg-shift = <2>;
 			interrupts = <34>;
 			clocks = <&gate_clk 7>;
-			/* set clock-frequency in board dts */
 			status = "disabled";
 		};

@ -75,6 +75,7 @@
 			compatible = "marvell,kirkwood-rtc", "marvell,orion-rtc";
 			reg = <0x10300 0x20>;
 			interrupts = <53>;
+			clocks = <&gate_clk 7>;
 		};

 		spi@10600 {
--- a/arch/arm/boot/dts/orion5x-lacie-ethernet-disk-mini-v2.dts
+++ b/arch/arm/boot/dts/orion5x-lacie-ethernet-disk-mini-v2.dts
@ -11,7 +11,7 @@

 / {
 	model = "LaCie Ethernet Disk mini V2";
-	compatible = "lacie,ethernet-disk-mini-v2", "marvell-orion5x-88f5182", "marvell,orion5x";
+	compatible = "lacie,ethernet-disk-mini-v2", "marvell,orion5x-88f5182", "marvell,orion5x";

 	memory {
 		reg = <0x00000000 0x4000000>; /* 64 MB */
--- a/arch/arm/boot/dts/snowball.dts
+++ b/arch/arm/boot/dts/snowball.dts
@ -298,7 +298,7 @@
 				};
 			};

-			ab8500@5 {
+			ab8500 {
 				ab8500-regulators {
 					ab8500_ldo_aux1_reg: ab8500_ldo_aux1 {
 						regulator-name = "V-DISPLAY";
--- a/arch/arm/boot/dts/socfpga.dtsi
+++ b/arch/arm/boot/dts/socfpga.dtsi
@ -75,6 +75,9 @@
 				compatible = "arm,pl330", "arm,primecell";
 				reg = <0xffe01000 0x1000>;
 				interrupts = <0 180 4>;
+				#dma-cells = <1>;
+				#dma-channels = <8>;
+				#dma-requests = <32>;
 			};
 		};

--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@ -118,6 +118,7 @@
 		compatible = "arm,cortex-a9-twd-timer";
 		reg = <0x50040600 0x20>;
 		interrupts = <1 13 0x304>;
+		clocks = <&tegra_car 132>;
 	};

 	intc: interrupt-controller {
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@ -119,6 +119,7 @@
 		compatible = "arm,cortex-a9-twd-timer";
 		reg = <0x50040600 0x20>;
 		interrupts = <1 13 0xf04>;
+		clocks = <&tegra_car 214>;
 	};

 	intc: interrupt-controller {
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@ -11,3 +11,4 @@ obj-$(CONFIG_SHARP_PARAM)	+= sharpsl_param.o
 obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
+obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@ -0,0 +1,263 @@
+/*
+ * arch/arm/common/mcpm_entry.c -- entry point for multi-cluster PM
+ *
+ * Created by:  Nicolas Pitre, March 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+
+#include <asm/mcpm.h>
+#include <asm/cacheflush.h>
+#include <asm/idmap.h>
+#include <asm/cputype.h>
+
+extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
+
+void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
+{
+	unsigned long val = ptr ? virt_to_phys(ptr) : 0;
+	mcpm_entry_vectors[cluster][cpu] = val;
+	sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
+}
+
+static const struct mcpm_platform_ops *platform_ops;
+
+int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
+{
+	if (platform_ops)
+		return -EBUSY;
+	platform_ops = ops;
+	return 0;
+}
+
+int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
+{
+	if (!platform_ops)
+		return -EUNATCH; /* try not to shadow power_up errors */
+	might_sleep();
+	return platform_ops->power_up(cpu, cluster);
+}
+
+typedef void (*phys_reset_t)(unsigned long);
+
+void mcpm_cpu_power_down(void)
+{
+	phys_reset_t phys_reset;
+
+	BUG_ON(!platform_ops);
+	BUG_ON(!irqs_disabled());
+
+	/*
+	 * Do this before calling into the power_down method,
+	 * as it might not always be safe to do afterwards.
+	 */
+	setup_mm_for_reboot();
+
+	platform_ops->power_down();
+
+	/*
+	 * It is possible for a power_up request to happen concurrently
+	 * with a power_down request for the same CPU. In this case the
+	 * power_down method might not be able to actually enter a
+	 * powered down state with the WFI instruction if the power_up
+	 * method has removed the required reset condition.  The
+	 * power_down method is then allowed to return. We must perform
+	 * a re-entry in the kernel as if the power_up method just had
+	 * deasserted reset on the CPU.
+	 *
+	 * To simplify race issues, the platform specific implementation
+	 * must accommodate for the possibility of unordered calls to
+	 * power_down and power_up with a usage count. Therefore, if a
+	 * call to power_up is issued for a CPU that is not down, then
+	 * the next call to power_down must not attempt a full shutdown
+	 * but only do the minimum (normally disabling L1 cache and CPU
+	 * coherency) and return just as if a concurrent power_up request
+	 * had happened as described above.
+	 */
+
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset(virt_to_phys(mcpm_entry_point));
+
+	/* should never get here */
+	BUG();
+}
+
+void mcpm_cpu_suspend(u64 expected_residency)
+{
+	phys_reset_t phys_reset;
+
+	BUG_ON(!platform_ops);
+	BUG_ON(!irqs_disabled());
+
+	/* Very similar to mcpm_cpu_power_down() */
+	setup_mm_for_reboot();
+	platform_ops->suspend(expected_residency);
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset(virt_to_phys(mcpm_entry_point));
+	BUG();
+}
+
+int mcpm_cpu_powered_up(void)
+{
+	if (!platform_ops)
+		return -EUNATCH;
+	if (platform_ops->powered_up)
+		platform_ops->powered_up();
+	return 0;
+}
+
+struct sync_struct mcpm_sync;
+
+/*
+ * __mcpm_cpu_going_down: Indicates that the cpu is being torn down.
+ *    This must be called at the point of committing to teardown of a CPU.
+ *    The CPU cache (SCTRL.C bit) is expected to still be active.
+ */
+void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster)
+{
+	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+}
+
+/*
+ * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the
+ *    cluster can be torn down without disrupting this CPU.
+ *    To avoid deadlocks, this must be called before a CPU is powered down.
+ *    The CPU cache (SCTRL.C bit) is expected to be off.
+ *    However L2 cache might or might not be active.
+ */
+void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster)
+{
+	dmb();
+	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+	dsb_sev();
+}
+
+/*
+ * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section.
+ * @state: the final state of the cluster:
+ *     CLUSTER_UP: no destructive teardown was done and the cluster has been
+ *         restored to the previous state (CPU cache still active); or
+ *     CLUSTER_DOWN: the cluster has been torn-down, ready for power-off
+ *         (CPU cache disabled, L2 cache either enabled or disabled).
+ */
+void __mcpm_outbound_leave_critical(unsigned int cluster, int state)
+{
+	dmb();
+	mcpm_sync.clusters[cluster].cluster = state;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cluster);
+	dsb_sev();
+}
+
+/*
+ * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section.
+ * This function should be called by the last man, after local CPU teardown
+ * is complete.  CPU cache expected to be active.
+ *
+ * Returns:
+ *     false: the critical section was not entered because an inbound CPU was
+ *         observed, or the cluster is already being set up;
+ *     true: the critical section was entered: it is now safe to tear down the
+ *         cluster.
+ */
+bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int i;
+	struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster];
+
+	/* Warn inbound CPUs that the cluster is being torn down: */
+	c->cluster = CLUSTER_GOING_DOWN;
+	sync_cache_w(&c->cluster);
+
+	/* Back out if the inbound cluster is already in the critical region: */
+	sync_cache_r(&c->inbound);
+	if (c->inbound == INBOUND_COMING_UP)
+		goto abort;
+
+	/*
+	 * Wait for all CPUs to get out of the GOING_DOWN state, so that local
+	 * teardown is complete on each CPU before tearing down the cluster.
+	 *
+	 * If any CPU has been woken up again from the DOWN state, then we
+	 * shouldn't be taking the cluster down at all: abort in that case.
+	 */
+	sync_cache_r(&c->cpus);
+	for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) {
+		int cpustate;
+
+		if (i == cpu)
+			continue;
+
+		while (1) {
+			cpustate = c->cpus[i].cpu;
+			if (cpustate != CPU_GOING_DOWN)
+				break;
+
+			wfe();
+			sync_cache_r(&c->cpus[i].cpu);
+		}
+
+		switch (cpustate) {
+		case CPU_DOWN:
+			continue;
+
+		default:
+			goto abort;
+		}
+	}
+
+	return true;
+
+abort:
+	__mcpm_outbound_leave_critical(cluster, CLUSTER_UP);
+	return false;
+}
+
+int __mcpm_cluster_state(unsigned int cluster)
+{
+	sync_cache_r(&mcpm_sync.clusters[cluster].cluster);
+	return mcpm_sync.clusters[cluster].cluster;
+}
+
+extern unsigned long mcpm_power_up_setup_phys;
+
+int __init mcpm_sync_init(
+	void (*power_up_setup)(unsigned int affinity_level))
+{
+	unsigned int i, j, mpidr, this_cluster;
+
+	BUILD_BUG_ON(MCPM_SYNC_CLUSTER_SIZE * MAX_NR_CLUSTERS != sizeof mcpm_sync);
+	BUG_ON((unsigned long)&mcpm_sync & (__CACHE_WRITEBACK_GRANULE - 1));
+
+	/*
+	 * Set initial CPU and cluster states.
+	 * Only one cluster is assumed to be active at this point.
+	 */
+	for (i = 0; i < MAX_NR_CLUSTERS; i++) {
+		mcpm_sync.clusters[i].cluster = CLUSTER_DOWN;
+		mcpm_sync.clusters[i].inbound = INBOUND_NOT_COMING_UP;
+		for (j = 0; j < MAX_CPUS_PER_CLUSTER; j++)
+			mcpm_sync.clusters[i].cpus[j].cpu = CPU_DOWN;
+	}
+	mpidr = read_cpuid_mpidr();
+	this_cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	for_each_online_cpu(i)
+		mcpm_sync.clusters[this_cluster].cpus[i].cpu = CPU_UP;
+	mcpm_sync.clusters[this_cluster].cluster = CLUSTER_UP;
+	sync_cache_w(&mcpm_sync);
+
+	if (power_up_setup) {
+		mcpm_power_up_setup_phys = virt_to_phys(power_up_setup);
+		sync_cache_w(&mcpm_power_up_setup_phys);
+	}
+
+	return 0;
+}
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@ -0,0 +1,219 @@
+/*
+ * arch/arm/common/mcpm_head.S -- kernel entry point for multi-cluster PM
+ *
+ * Created by:  Nicolas Pitre, March 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *
+ * Refer to Documentation/arm/cluster-pm-race-avoidance.txt
+ * for details of the synchronisation algorithms used here.
+ */
+
+#include <linux/linkage.h>
+#include <asm/mcpm.h>
+
+#include "vlock.h"
+
+.if MCPM_SYNC_CLUSTER_CPUS
+.error "cpus must be the first member of struct mcpm_sync_struct"
+.endif
+
+	.macro	pr_dbg	string
+#if defined(CONFIG_DEBUG_LL) && defined(DEBUG)
+	b	1901f
+1902:	.asciz	"CPU"
+1903:	.asciz	" cluster"
+1904:	.asciz	": \string"
+	.align
+1901:	adr	r0, 1902b
+	bl	printascii
+	mov	r0, r9
+	bl	printhex8
+	adr	r0, 1903b
+	bl	printascii
+	mov	r0, r10
+	bl	printhex8
+	adr	r0, 1904b
+	bl	printascii
+#endif
+	.endm
+
+	.arm
+	.align
+
+ENTRY(mcpm_entry_point)
+
+ THUMB(	adr	r12, BSYM(1f)	)
+ THUMB(	bx	r12		)
+ THUMB(	.thumb			)
+1:
+	mrc	p15, 0, r0, c0, c0, 5		@ MPIDR
+	ubfx	r9, r0, #0, #8			@ r9 = cpu
+	ubfx	r10, r0, #8, #8			@ r10 = cluster
+	mov	r3, #MAX_CPUS_PER_CLUSTER
+	mla	r4, r3, r10, r9			@ r4 = canonical CPU index
+	cmp	r4, #(MAX_CPUS_PER_CLUSTER * MAX_NR_CLUSTERS)
+	blo	2f
+
+	/* We didn't expect this CPU.  Try to cheaply make it quiet. */
+1:	wfi
+	wfe
+	b	1b
+
+2:	pr_dbg	"kernel mcpm_entry_point\n"
+
+	/*
+	 * MMU is off so we need to get to various variables in a
+	 * position independent way.
+	 */
+	adr	r5, 3f
+	ldmia	r5, {r6, r7, r8, r11}
+	add	r6, r5, r6			@ r6 = mcpm_entry_vectors
+	ldr	r7, [r5, r7]			@ r7 = mcpm_power_up_setup_phys
+	add	r8, r5, r8			@ r8 = mcpm_sync
+	add	r11, r5, r11			@ r11 = first_man_locks
+
+	mov	r0, #MCPM_SYNC_CLUSTER_SIZE
+	mla	r8, r0, r10, r8			@ r8 = sync cluster base
+
+	@ Signal that this CPU is coming UP:
+	mov	r0, #CPU_COMING_UP
+	mov	r5, #MCPM_SYNC_CPU_SIZE
+	mla	r5, r9, r5, r8			@ r5 = sync cpu address
+	strb	r0, [r5]
+
+	@ At this point, the cluster cannot unexpectedly enter the GOING_DOWN
+	@ state, because there is at least one active CPU (this CPU).
+
+	mov	r0, #VLOCK_SIZE
+	mla	r11, r0, r10, r11		@ r11 = cluster first man lock
+	mov	r0, r11
+	mov	r1, r9				@ cpu
+	bl	vlock_trylock			@ implies DMB
+
+	cmp	r0, #0				@ failed to get the lock?
+	bne	mcpm_setup_wait		@ wait for cluster setup if so
+
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_UP			@ cluster already up?
+	bne	mcpm_setup			@ if not, set up the cluster
+
+	@ Otherwise, release the first man lock and skip setup:
+	mov	r0, r11
+	bl	vlock_unlock
+	b	mcpm_setup_complete
+
+mcpm_setup:
+	@ Control dependency implies strb not observable before previous ldrb.
+
+	@ Signal that the cluster is being brought up:
+	mov	r0, #INBOUND_COMING_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND]
+	dmb
+
+	@ Any CPU trying to take the cluster into CLUSTER_GOING_DOWN from this
+	@ point onwards will observe INBOUND_COMING_UP and abort.
+
+	@ Wait for any previously-pending cluster teardown operations to abort
+	@ or complete:
+mcpm_teardown_wait:
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_GOING_DOWN
+	bne	first_man_setup
+	wfe
+	b	mcpm_teardown_wait
+
+first_man_setup:
+	dmb
+
+	@ If the outbound gave up before teardown started, skip cluster setup:
+
+	cmp	r0, #CLUSTER_UP
+	beq	mcpm_setup_leave
+
+	@ power_up_setup is now responsible for setting up the cluster:
+
+	cmp	r7, #0
+	mov	r0, #1		@ second (cluster) affinity level
+	blxne	r7		@ Call power_up_setup if defined
+	dmb
+
+	mov	r0, #CLUSTER_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	dmb
+
+mcpm_setup_leave:
+	@ Leave the cluster setup critical section:
+
+	mov	r0, #INBOUND_NOT_COMING_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND]
+	dsb
+	sev
+
+	mov	r0, r11
+	bl	vlock_unlock	@ implies DMB
+	b	mcpm_setup_complete
+
+	@ In the contended case, non-first men wait here for cluster setup
+	@ to complete:
+mcpm_setup_wait:
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_UP
+	wfene
+	bne	mcpm_setup_wait
+	dmb
+
+mcpm_setup_complete:
+	@ If a platform-specific CPU setup hook is needed, it is
+	@ called from here.
+
+	cmp	r7, #0
+	mov	r0, #0		@ first (CPU) affinity level
+	blxne	r7		@ Call power_up_setup if defined
+	dmb
+
+	@ Mark the CPU as up:
+
+	mov	r0, #CPU_UP
+	strb	r0, [r5]
+
+	@ Observability order of CPU_UP and opening of the gate does not matter.
+
+mcpm_entry_gated:
+	ldr	r5, [r6, r4, lsl #2]		@ r5 = CPU entry vector
+	cmp	r5, #0
+	wfeeq
+	beq	mcpm_entry_gated
+	dmb
+
+	pr_dbg	"released\n"
+	bx	r5
+
+	.align	2
+
+3:	.word	mcpm_entry_vectors - .
+	.word	mcpm_power_up_setup_phys - 3b
+	.word	mcpm_sync - 3b
+	.word	first_man_locks - 3b
+
+ENDPROC(mcpm_entry_point)
+
+	.bss
+
+	.align	CACHE_WRITEBACK_ORDER
+	.type	first_man_locks, #object
+first_man_locks:
+	.space	VLOCK_SIZE * MAX_NR_CLUSTERS
+	.align	CACHE_WRITEBACK_ORDER
+
+	.type	mcpm_entry_vectors, #object
+ENTRY(mcpm_entry_vectors)
+	.space	4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
+	.type	mcpm_power_up_setup_phys, #object
+ENTRY(mcpm_power_up_setup_phys)
+	.space  4		@ set by mcpm_sync_init()
--- a/arch/arm/common/mcpm_platsmp.c
+++ b/arch/arm/common/mcpm_platsmp.c
@ -0,0 +1,92 @@
+/*
+ * linux/arch/arm/mach-vexpress/mcpm_platsmp.c
+ *
+ * Created by:  Nicolas Pitre, November 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Code to handle secondary CPU bringup and hotplug for the cluster power API.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/mcpm.h>
+#include <asm/smp.h>
+#include <asm/smp_plat.h>
+
+static void __init simple_smp_init_cpus(void)
+{
+}
+
+static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned int mpidr, pcpu, pcluster, ret;
+	extern void secondary_startup(void);
+
+	mpidr = cpu_logical_map(cpu);
+	pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	pr_debug("%s: logical CPU %d is physical CPU %d cluster %d\n",
+		 __func__, cpu, pcpu, pcluster);
+
+	mcpm_set_entry_vector(pcpu, pcluster, NULL);
+	ret = mcpm_cpu_power_up(pcpu, pcluster);
+	if (ret)
+		return ret;
+	mcpm_set_entry_vector(pcpu, pcluster, secondary_startup);
+	arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+	dsb_sev();
+	return 0;
+}
+
+static void __cpuinit mcpm_secondary_init(unsigned int cpu)
+{
+	mcpm_cpu_powered_up();
+	gic_secondary_init(0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int mcpm_cpu_disable(unsigned int cpu)
+{
+	/*
+	 * We assume all CPUs may be shut down.
+	 * This would be the hook to use for eventual Secure
+	 * OS migration requests as described in the PSCI spec.
+	 */
+	return 0;
+}
+
+static void mcpm_cpu_die(unsigned int cpu)
+{
+	unsigned int mpidr, pcpu, pcluster;
+	mpidr = read_cpuid_mpidr();
+	pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	mcpm_set_entry_vector(pcpu, pcluster, NULL);
+	mcpm_cpu_power_down();
+}
+
+#endif
+
+static struct smp_operations __initdata mcpm_smp_ops = {
+	.smp_init_cpus		= simple_smp_init_cpus,
+	.smp_boot_secondary	= mcpm_boot_secondary,
+	.smp_secondary_init	= mcpm_secondary_init,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable		= mcpm_cpu_disable,
+	.cpu_die		= mcpm_cpu_die,
+#endif
+};
+
+void __init mcpm_smp_set_ops(void)
+{
+	smp_set_ops(&mcpm_smp_ops);
+}
--- a/arch/arm/common/vlock.S
+++ b/arch/arm/common/vlock.S
@ -0,0 +1,108 @@
+/*
+ * vlock.S - simple voting lock implementation for ARM
+ *
+ * Created by:	Dave Martin, 2012-08-16
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * This algorithm is described in more detail in
+ * Documentation/arm/vlocks.txt.
+ */
+
+#include <linux/linkage.h>
+#include "vlock.h"
+
+/* Select different code if voting flags  can fit in a single word. */
+#if VLOCK_VOTING_SIZE > 4
+#define FEW(x...)
+#define MANY(x...) x
+#else
+#define FEW(x...) x
+#define MANY(x...)
+#endif
+
+@ voting lock for first-man coordination
+
+.macro voting_begin rbase:req, rcpu:req, rscratch:req
+	mov	\rscratch, #1
+	strb	\rscratch, [\rbase, \rcpu]
+	dmb
+.endm
+
+.macro voting_end rbase:req, rcpu:req, rscratch:req
+	dmb
+	mov	\rscratch, #0
+	strb	\rscratch, [\rbase, \rcpu]
+	dsb
+	sev
+.endm
+
+/*
+ * The vlock structure must reside in Strongly-Ordered or Device memory.
+ * This implementation deliberately eliminates most of the barriers which
+ * would be required for other memory types, and assumes that independent
+ * writes to neighbouring locations within a cacheline do not interfere
+ * with one another.
+ */
+
+@ r0: lock structure base
+@ r1: CPU ID (0-based index within cluster)
+ENTRY(vlock_trylock)
+	add	r1, r1, #VLOCK_VOTING_OFFSET
+
+	voting_begin	r0, r1, r2
+
+	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]	@ check whether lock is held
+	cmp	r2, #VLOCK_OWNER_NONE
+	bne	trylock_fail			@ fail if so
+
+	@ Control dependency implies strb not observable before previous ldrb.
+
+	strb	r1, [r0, #VLOCK_OWNER_OFFSET]	@ submit my vote
+
+	voting_end	r0, r1, r2		@ implies DMB
+
+	@ Wait for the current round of voting to finish:
+
+ MANY(	mov	r3, #VLOCK_VOTING_OFFSET			)
+0:
+ MANY(	ldr	r2, [r0, r3]					)
+ FEW(	ldr	r2, [r0, #VLOCK_VOTING_OFFSET]			)
+	cmp	r2, #0
+	wfene
+	bne	0b
+ MANY(	add	r3, r3, #4					)
+ MANY(	cmp	r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE	)
+ MANY(	bne	0b						)
+
+	@ Check who won:
+
+	dmb
+	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]
+	eor	r0, r1, r2			@ zero if I won, else nonzero
+	bx	lr
+
+trylock_fail:
+	voting_end	r0, r1, r2
+	mov	r0, #1				@ nonzero indicates that I lost
+	bx	lr
+ENDPROC(vlock_trylock)
+
+@ r0: lock structure base
+ENTRY(vlock_unlock)
+	dmb
+	mov	r1, #VLOCK_OWNER_NONE
+	strb	r1, [r0, #VLOCK_OWNER_OFFSET]
+	dsb
+	sev
+	bx	lr
+ENDPROC(vlock_unlock)
--- a/arch/arm/common/vlock.h
+++ b/arch/arm/common/vlock.h
@ -0,0 +1,29 @@
+/*
+ * vlock.h - simple voting lock implementation
+ *
+ * Created by:	Dave Martin, 2012-08-16
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __VLOCK_H
+#define __VLOCK_H
+
+#include <asm/mcpm.h>
+
+/* Offsets and sizes are rounded to a word (4 bytes) */
+#define VLOCK_OWNER_OFFSET	0
+#define VLOCK_VOTING_OFFSET	4
+#define VLOCK_VOTING_SIZE	((MAX_CPUS_PER_CLUSTER + 3) / 4 * 4)
+#define VLOCK_SIZE		(VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE)
+#define VLOCK_OWNER_NONE	0
+
+#endif /* ! __VLOCK_H */
--- a/arch/arm/configs/mxs_defconfig
+++ b/arch/arm/configs/mxs_defconfig
@ -116,6 +116,7 @@ CONFIG_SND_SOC=y
 CONFIG_SND_MXS_SOC=y
 CONFIG_SND_SOC_MXS_SGTL5000=y
 CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_HOST=y
 CONFIG_USB_STORAGE=y
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@ -126,6 +126,8 @@ CONFIG_INPUT_MISC=y
 CONFIG_INPUT_TWL4030_PWRBUTTON=y
 CONFIG_VT_HW_CONSOLE_BINDING=y
 # CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=32
 CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_MANY_PORTS=y
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@ -363,4 +363,79 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
 		flush_cache_all();
 }

+/*
+ * Memory synchronization helpers for mixed cached vs non cached accesses.
+ *
+ * Some synchronization algorithms have to set states in memory with the
+ * cache enabled or disabled depending on the code path.  It is crucial
+ * to always ensure proper cache maintenance to update main memory right
+ * away in that case.
+ *
+ * Any cached write must be followed by a cache clean operation.
+ * Any cached read must be preceded by a cache invalidate operation.
+ * Yet, in the read case, a cache flush i.e. atomic clean+invalidate
+ * operation is needed to avoid discarding possible concurrent writes to the
+ * accessed memory.
+ *
+ * Also, in order to prevent a cached writer from interfering with an
+ * adjacent non-cached writer, each state variable must be located to
+ * a separate cache line.
+ */
+
+/*
+ * This needs to be >= the max cache writeback size of all
+ * supported platforms included in the current kernel configuration.
+ * This is used to align state variables to their own cache lines.
+ */
+#define __CACHE_WRITEBACK_ORDER 6  /* guessed from existing platforms */
+#define __CACHE_WRITEBACK_GRANULE (1 << __CACHE_WRITEBACK_ORDER)
+
+/*
+ * There is no __cpuc_clean_dcache_area but we use it anyway for
+ * code intent clarity, and alias it to __cpuc_flush_dcache_area.
+ */
+#define __cpuc_clean_dcache_area __cpuc_flush_dcache_area
+
+/*
+ * Ensure preceding writes to *p by this CPU are visible to
+ * subsequent reads by other CPUs:
+ */
+static inline void __sync_cache_range_w(volatile void *p, size_t size)
+{
+	char *_p = (char *)p;
+
+	__cpuc_clean_dcache_area(_p, size);
+	outer_clean_range(__pa(_p), __pa(_p + size));
+}
+
+/*
+ * Ensure preceding writes to *p by other CPUs are visible to
+ * subsequent reads by this CPU.  We must be careful not to
+ * discard data simultaneously written by another CPU, hence the
+ * usage of flush rather than invalidate operations.
+ */
+static inline void __sync_cache_range_r(volatile void *p, size_t size)
+{
+	char *_p = (char *)p;
+
+#ifdef CONFIG_OUTER_CACHE
+	if (outer_cache.flush_range) {
+		/*
+		 * Ensure dirty data migrated from other CPUs into our cache
+		 * are cleaned out safely before the outer cache is cleaned:
+		 */
+		__cpuc_clean_dcache_area(_p, size);
+
+		/* Clean and invalidate stale data for *p from outer ... */
+		outer_flush_range(__pa(_p), __pa(_p + size));
+	}
+#endif
+
+	/* ... and inner cache: */
+	__cpuc_flush_dcache_area(_p, size);
+}
+
+#define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr))
+#define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr))
+
 #endif
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@ -0,0 +1,209 @@
+/*
+ * arch/arm/include/asm/mcpm.h
+ *
+ * Created by:  Nicolas Pitre, April 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MCPM_H
+#define MCPM_H
+
+/*
+ * Maximum number of possible clusters / CPUs per cluster.
+ *
+ * This should be sufficient for quite a while, while keeping the
+ * (assembly) code simpler.  When this starts to grow then we'll have
+ * to consider dynamic allocation.
+ */
+#define MAX_CPUS_PER_CLUSTER	4
+#define MAX_NR_CLUSTERS		2
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Platform specific code should use this symbol to set up secondary
+ * entry location for processors to use when released from reset.
+ */
+extern void mcpm_entry_point(void);
+
+/*
+ * This is used to indicate where the given CPU from given cluster should
+ * branch once it is ready to re-enter the kernel using ptr, or NULL if it
+ * should be gated.  A gated CPU is held in a WFE loop until its vector
+ * becomes non NULL.
+ */
+void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr);
+
+/*
+ * CPU/cluster power operations API for higher subsystems to use.
+ */
+
+/**
+ * mcpm_cpu_power_up - make given CPU in given cluster runable
+ *
+ * @cpu: CPU number within given cluster
+ * @cluster: cluster number for the CPU
+ *
+ * The identified CPU is brought out of reset.  If the cluster was powered
+ * down then it is brought up as well, taking care not to let the other CPUs
+ * in the cluster run, and ensuring appropriate cluster setup.
+ *
+ * Caller must ensure the appropriate entry vector is initialized with
+ * mcpm_set_entry_vector() prior to calling this.
+ *
+ * This must be called in a sleepable context.  However, the implementation
+ * is strongly encouraged to return early and let the operation happen
+ * asynchronously, especially when significant delays are expected.
+ *
+ * If the operation cannot be performed then an error code is returned.
+ */
+int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster);
+
+/**
+ * mcpm_cpu_power_down - power the calling CPU down
+ *
+ * The calling CPU is powered down.
+ *
+ * If this CPU is found to be the "last man standing" in the cluster
+ * then the cluster is prepared for power-down too.
+ *
+ * This must be called with interrupts disabled.
+ *
+ * This does not return.  Re-entry in the kernel is expected via
+ * mcpm_entry_point.
+ */
+void mcpm_cpu_power_down(void);
+
+/**
+ * mcpm_cpu_suspend - bring the calling CPU in a suspended state
+ *
+ * @expected_residency: duration in microseconds the CPU is expected
+ *			to remain suspended, or 0 if unknown/infinity.
+ *
+ * The calling CPU is suspended.  The expected residency argument is used
+ * as a hint by the platform specific backend to implement the appropriate
+ * sleep state level according to the knowledge it has on wake-up latency
+ * for the given hardware.
+ *
+ * If this CPU is found to be the "last man standing" in the cluster
+ * then the cluster may be prepared for power-down too, if the expected
+ * residency makes it worthwhile.
+ *
+ * This must be called with interrupts disabled.
+ *
+ * This does not return.  Re-entry in the kernel is expected via
+ * mcpm_entry_point.
+ */
+void mcpm_cpu_suspend(u64 expected_residency);
+
+/**
+ * mcpm_cpu_powered_up - housekeeping workafter a CPU has been powered up
+ *
+ * This lets the platform specific backend code perform needed housekeeping
+ * work.  This must be called by the newly activated CPU as soon as it is
+ * fully operational in kernel space, before it enables interrupts.
+ *
+ * If the operation cannot be performed then an error code is returned.
+ */
+int mcpm_cpu_powered_up(void);
+
+/*
+ * Platform specific methods used in the implementation of the above API.
+ */
+struct mcpm_platform_ops {
+	int (*power_up)(unsigned int cpu, unsigned int cluster);
+	void (*power_down)(void);
+	void (*suspend)(u64);
+	void (*powered_up)(void);
+};
+
+/**
+ * mcpm_platform_register - register platform specific power methods
+ *
+ * @ops: mcpm_platform_ops structure to register
+ *
+ * An error is returned if the registration has been done previously.
+ */
+int __init mcpm_platform_register(const struct mcpm_platform_ops *ops);
+
+/* Synchronisation structures for coordinating safe cluster setup/teardown: */
+
+/*
+ * When modifying this structure, make sure you update the MCPM_SYNC_ defines
+ * to match.
+ */
+struct mcpm_sync_struct {
+	/* individual CPU states */
+	struct {
+		s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE);
+	} cpus[MAX_CPUS_PER_CLUSTER];
+
+	/* cluster state */
+	s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE);
+
+	/* inbound-side state */
+	s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE);
+};
+
+struct sync_struct {
+	struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS];
+};
+
+extern unsigned long sync_phys;	/* physical address of *mcpm_sync */
+
+void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster);
+void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster);
+void __mcpm_outbound_leave_critical(unsigned int cluster, int state);
+bool __mcpm_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster);
+int __mcpm_cluster_state(unsigned int cluster);
+
+int __init mcpm_sync_init(
+	void (*power_up_setup)(unsigned int affinity_level));
+
+void __init mcpm_smp_set_ops(void);
+
+#else
+
+/* 
+ * asm-offsets.h causes trouble when included in .c files, and cacheflush.h
+ * cannot be included in asm files.  Let's work around the conflict like this.
+ */
+#include <asm/asm-offsets.h>
+#define __CACHE_WRITEBACK_GRANULE CACHE_WRITEBACK_GRANULE
+
+#endif /* ! __ASSEMBLY__ */
+
+/* Definitions for mcpm_sync_struct */
+#define CPU_DOWN		0x11
+#define CPU_COMING_UP		0x12
+#define CPU_UP			0x13
+#define CPU_GOING_DOWN		0x14
+
+#define CLUSTER_DOWN		0x21
+#define CLUSTER_UP		0x22
+#define CLUSTER_GOING_DOWN	0x23
+
+#define INBOUND_NOT_COMING_UP	0x31
+#define INBOUND_COMING_UP	0x32
+
+/*
+ * Offsets for the mcpm_sync_struct members, for use in asm.
+ * We don't want to make them global to the kernel via asm-offsets.c.
+ */
+#define MCPM_SYNC_CLUSTER_CPUS	0
+#define MCPM_SYNC_CPU_SIZE	__CACHE_WRITEBACK_GRANULE
+#define MCPM_SYNC_CLUSTER_CLUSTER \
+	(MCPM_SYNC_CLUSTER_CPUS + MCPM_SYNC_CPU_SIZE * MAX_CPUS_PER_CLUSTER)
+#define MCPM_SYNC_CLUSTER_INBOUND \
+	(MCPM_SYNC_CLUSTER_CLUSTER + __CACHE_WRITEBACK_GRANULE)
+#define MCPM_SYNC_CLUSTER_SIZE \
+	(MCPM_SYNC_CLUSTER_INBOUND + __CACHE_WRITEBACK_GRANULE)
+
+#endif
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@ -5,15 +5,15 @@

 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
-	u64 id;
+	atomic64_t	id;
 #endif
-	unsigned int vmalloc_seq;
+	unsigned int	vmalloc_seq;
 } mm_context_t;

 #ifdef CONFIG_CPU_HAS_ASID
 #define ASID_BITS	8
 #define ASID_MASK	((~0ULL) << ASID_BITS)
-#define ASID(mm)	((mm)->context.id & ~ASID_MASK)
+#define ASID(mm)	((mm)->context.id.counter & ~ASID_MASK)
 #else
 #define ASID(mm)	(0)
 #endif
@ -26,7 +26,7 @@ typedef struct {
 *  modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com>
 */
 typedef struct {
-	unsigned long		end_brk;
+	unsigned long	end_brk;
 } mm_context_t;

 #endif
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@ -25,7 +25,7 @@ void __check_vmalloc_seq(struct mm_struct *mm);
 #ifdef CONFIG_CPU_HAS_ASID

 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
-#define init_new_context(tsk,mm)	({ mm->context.id = 0; })
+#define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })

 #else	/* !CONFIG_CPU_HAS_ASID */

--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@ -34,10 +34,13 @@
 #define TLB_V6_D_ASID	(1 << 17)
 #define TLB_V6_I_ASID	(1 << 18)

+#define TLB_V6_BP	(1 << 19)
+
 /* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
-#define TLB_V7_UIS_PAGE	(1 << 19)
-#define TLB_V7_UIS_FULL (1 << 20)
-#define TLB_V7_UIS_ASID (1 << 21)
+#define TLB_V7_UIS_PAGE	(1 << 20)
+#define TLB_V7_UIS_FULL (1 << 21)
+#define TLB_V7_UIS_ASID (1 << 22)
+#define TLB_V7_UIS_BP	(1 << 23)

 #define TLB_BARRIER	(1 << 28)
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
@ -150,7 +153,8 @@
 #define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V6_I_FULL | TLB_V6_D_FULL | \
 			 TLB_V6_I_PAGE | TLB_V6_D_PAGE | \
-			 TLB_V6_I_ASID | TLB_V6_D_ASID)
+			 TLB_V6_I_ASID | TLB_V6_D_ASID | \
+			 TLB_V6_BP)

 #ifdef CONFIG_CPU_TLB_V6
 # define v6wbi_possible_flags	v6wbi_tlb_flags
@ -166,9 +170,11 @@
 #endif

 #define v7wbi_tlb_flags_smp	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
-			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
+				 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | \
+				 TLB_V7_UIS_ASID | TLB_V7_UIS_BP)
 #define v7wbi_tlb_flags_up	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
-			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
+				 TLB_V6_U_FULL | TLB_V6_U_PAGE | \
+				 TLB_V6_U_ASID | TLB_V6_BP)

 #ifdef CONFIG_CPU_TLB_V7

@ -430,6 +436,20 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 	}
 }

+static inline void local_flush_bp_all(void)
+{
+	const int zero = 0;
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	if (tlb_flag(TLB_V7_UIS_BP))
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero));
+	else if (tlb_flag(TLB_V6_BP))
+		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero));
+
+	if (tlb_flag(TLB_BARRIER))
+		isb();
+}
+
 /*
 *	flush_pmd_entry
 *
@ -480,6 +500,7 @@ static inline void clean_pmd_entry(void *pmd)
 #define flush_tlb_kernel_page	local_flush_tlb_kernel_page
 #define flush_tlb_range		local_flush_tlb_range
 #define flush_tlb_kernel_range	local_flush_tlb_kernel_range
+#define flush_bp_all		local_flush_bp_all
 #else
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
@ -487,6 +508,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
 extern void flush_tlb_kernel_page(unsigned long kaddr);
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_bp_all(void);
 #endif

 /*
--- a/arch/arm/include/asm/xen/events.h
+++ b/arch/arm/include/asm/xen/events.h
@ -2,6 +2,7 @@
 #define _ASM_ARM_XEN_EVENTS_H

 #include <asm/ptrace.h>
+#include <asm/atomic.h>

 enum ipi_vector {
 	XEN_PLACEHOLDER_VECTOR,
@ -15,26 +16,8 @@ static inline int xen_irqs_disabled(struct pt_regs *regs)
 	return raw_irqs_disabled_flags(regs->ARM_cpsr);
 }

-/*
- * We cannot use xchg because it does not support 8-byte
- * values. However it is safe to use {ldr,dtd}exd directly because all
- * platforms which Xen can run on support those instructions.
- */
-static inline xen_ulong_t xchg_xen_ulong(xen_ulong_t *ptr, xen_ulong_t val)
-{
-	xen_ulong_t oldval;
-	unsigned int tmp;
-
-	wmb();
-	asm volatile("@ xchg_xen_ulong\n"
-		"1:     ldrexd  %0, %H0, [%3]\n"
-		"       strexd  %1, %2, %H2, [%3]\n"
-		"       teq     %1, #0\n"
-		"       bne     1b"
-		: "=&r" (oldval), "=&r" (tmp)
-		: "r" (val), "r" (ptr)
-		: "memory", "cc");
-	return oldval;
-}
+#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((ptr),	\
+							    atomic64_t,	\
+							    counter), (val))

 #endif /* _ASM_ARM_XEN_EVENTS_H */
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@ -404,7 +404,7 @@
 #define __NR_setns			(__NR_SYSCALL_BASE+375)
 #define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
-					/* 378 for kcmp */
+#define __NR_kcmp			(__NR_SYSCALL_BASE+378)
 #define __NR_finit_module		(__NR_SYSCALL_BASE+379)

 /*
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@ -110,7 +110,7 @@ int main(void)
  BLANK();
 #endif
 #ifdef CONFIG_CPU_HAS_ASID
-  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id));
+  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
  BLANK();
 #endif
  DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
@ -149,6 +149,10 @@ int main(void)
  DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
  DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
  DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
+  BLANK();
+  DEFINE(CACHE_WRITEBACK_ORDER, __CACHE_WRITEBACK_ORDER);
+  DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE);
+  BLANK();
 #ifdef CONFIG_KVM_ARM_HOST
  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
  DEFINE(VCPU_MIDR,		offsetof(struct kvm_vcpu, arch.midr));
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@ -387,7 +387,7 @@
 /* 375 */	CALL(sys_setns)
 		CALL(sys_process_vm_readv)
 		CALL(sys_process_vm_writev)
-		CALL(sys_ni_syscall)	/* reserved for sys_kcmp */
+		CALL(sys_kcmp)
 		CALL(sys_finit_module)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@ -184,13 +184,22 @@ __create_page_tables:
 	orr	r3, r3, #3			@ PGD block type
 	mov	r6, #4				@ PTRS_PER_PGD
 	mov	r7, #1 << (55 - 32)		@ L_PGD_SWAPPER
-1:	str	r3, [r0], #4			@ set bottom PGD entry bits
+1:
+#ifdef CONFIG_CPU_ENDIAN_BE8
 	str	r7, [r0], #4			@ set top PGD entry bits
+	str	r3, [r0], #4			@ set bottom PGD entry bits
+#else
+	str	r3, [r0], #4			@ set bottom PGD entry bits
+	str	r7, [r0], #4			@ set top PGD entry bits
+#endif
 	add	r3, r3, #0x1000			@ next PMD table
 	subs	r6, r6, #1
 	bne	1b

 	add	r4, r4, #0x1000			@ point to the PMD tables
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	add	r4, r4, #4			@ we only write the bottom word
+#endif
 #endif

 	ldr	r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
@ -258,6 +267,11 @@ __create_page_tables:
 	addne	r6, r6, #1 << SECTION_SHIFT
 	strne	r6, [r3]

+#if defined(CONFIG_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
+	sub	r4, r4, #4			@ Fixup page table pointer
+						@ for 64-bit descriptors
+#endif
+
 #ifdef CONFIG_DEBUG_LL
 #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
 	/*
@ -276,12 +290,16 @@ __create_page_tables:
 	orr	r3, r7, r3, lsl #SECTION_SHIFT
 #ifdef CONFIG_ARM_LPAE
 	mov	r7, #1 << (54 - 32)		@ XN
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	str	r7, [r0], #4
+	str	r3, [r0], #4
+#else
+	str	r3, [r0], #4
+	str	r7, [r0], #4
+#endif
 #else
 	orr	r3, r3, #PMD_SECT_XN
-#endif
 	str	r3, [r0], #4
-#ifdef CONFIG_ARM_LPAE
-	str	r7, [r0], #4
 #endif

 #else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@ -1023,7 +1023,7 @@ out_mdbgen:
 static int __cpuinit dbg_reset_notify(struct notifier_block *self,
 				      unsigned long action, void *cpu)
 {
-	if (action == CPU_ONLINE)
+	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
 		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);

 	return NOTIFY_OK;
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@ -400,7 +400,7 @@ __hw_perf_event_init(struct perf_event *event)
 	}

 	if (event->group_leader != event) {
-		if (validate_group(event) != 0);
+		if (validate_group(event) != 0)
 			return -EINVAL;
 	}

@ -484,7 +484,7 @@ const struct dev_pm_ops armpmu_dev_pm_ops = {
 	SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
 };

-static void __init armpmu_init(struct arm_pmu *armpmu)
+static void armpmu_init(struct arm_pmu *armpmu)
 {
 	atomic_set(&armpmu->active_events, 0);
 	mutex_init(&armpmu->reserve_mutex);
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@ -774,7 +774,7 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 /*
 * PMXEVTYPER: Event selection reg
 */
-#define	ARMV7_EVTYPE_MASK	0xc00000ff	/* Mask for writable bits */
+#define	ARMV7_EVTYPE_MASK	0xc80000ff	/* Mask for writable bits */
 #define	ARMV7_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */

 /*
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@ -285,6 +285,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	 * switch away from it before attempting any exclusive accesses.
 	 */
 	cpu_switch_mm(mm->pgd, mm);
+	local_flush_bp_all();
 	enter_lazy_tlb(mm, current);
 	local_flush_tlb_all();

@ -479,7 +480,7 @@ static void __cpuinit broadcast_timer_setup(struct clock_event_device *evt)
 	evt->features	= CLOCK_EVT_FEAT_ONESHOT |
 			  CLOCK_EVT_FEAT_PERIODIC |
 			  CLOCK_EVT_FEAT_DUMMY;
-	evt->rating	= 400;
+	evt->rating	= 100;
 	evt->mult	= 1;
 	evt->set_mode	= broadcast_timer_set_mode;

--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@ -64,6 +64,11 @@ static inline void ipi_flush_tlb_kernel_range(void *arg)
 	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
 }

+static inline void ipi_flush_bp_all(void *ignored)
+{
+	local_flush_bp_all();
+}
+
 void flush_tlb_all(void)
 {
 	if (tlb_ops_need_broadcast())
@ -127,3 +132,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 		local_flush_tlb_kernel_range(start, end);
 }

+void flush_bp_all(void)
+{
+	if (tlb_ops_need_broadcast())
+		on_each_cpu(ipi_flush_bp_all, NULL, 1);
+	else
+		local_flush_bp_all();
+}
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@ -22,6 +22,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>

+#include <asm/smp_plat.h>
 #include <asm/smp_twd.h>
 #include <asm/localtimer.h>

@ -373,6 +374,9 @@ void __init twd_local_timer_of_register(void)
 	struct device_node *np;
 	int err;

+	if (!is_smp() || !setup_max_cpus)
+		return;
+
 	np = of_find_matching_node(NULL, twd_of_match);
 	if (!np)
 		return;
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@ -68,6 +68,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	ret = __cpu_suspend(arg, fn);
 	if (ret == 0) {
 		cpu_switch_mm(mm->pgd, mm);
+		local_flush_bp_all();
 		local_flush_tlb_all();
 	}

--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@ -14,27 +14,15 @@

 	.text
 	.align	5
-	.word	0
-
-1:	subs	r2, r2, #4		@ 1 do we have enough
-	blt	5f			@ 1 bytes to align with?
-	cmp	r3, #2			@ 1
-	strltb	r1, [r0], #1		@ 1
-	strleb	r1, [r0], #1		@ 1
-	strb	r1, [r0], #1		@ 1
-	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted.  Try doing the
- * memset again.
- */

 ENTRY(memset)
 	ands	r3, r0, #3		@ 1 unaligned?
-	bne	1b			@ 1
+	mov	ip, r0			@ preserve r0 as return value
+	bne	6f			@ 1
 /*
- * we know that the pointer in r0 is aligned to a word boundary.
+ * we know that the pointer in ip is aligned to a word boundary.
 */
-	orr	r1, r1, r1, lsl #8
+1:	orr	r1, r1, r1, lsl #8
 	orr	r1, r1, r1, lsl #16
 	mov	r3, r1
 	cmp	r2, #16
@ -43,29 +31,28 @@ ENTRY(memset)
 #if ! CALGN(1)+0

 /*
- * We need an extra register for this loop - save the return address and
- * use the LR
+ * We need 2 extra registers for this loop - use r8 and the LR
 */
-	str	lr, [sp, #-4]!
-	mov	ip, r1
+	stmfd	sp!, {r8, lr}
+	mov	r8, r1
 	mov	lr, r1

 2:	subs	r2, r2, #64
-	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time.
-	stmgeia	r0!, {r1, r3, ip, lr}
-	stmgeia	r0!, {r1, r3, ip, lr}
-	stmgeia	r0!, {r1, r3, ip, lr}
+	stmgeia	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
+	stmgeia	ip!, {r1, r3, r8, lr}
+	stmgeia	ip!, {r1, r3, r8, lr}
+	stmgeia	ip!, {r1, r3, r8, lr}
 	bgt	2b
-	ldmeqfd	sp!, {pc}		@ Now <64 bytes to go.
+	ldmeqfd	sp!, {r8, pc}		@ Now <64 bytes to go.
 /*
 * No need to correct the count; we're only testing bits from now on
 */
 	tst	r2, #32
-	stmneia	r0!, {r1, r3, ip, lr}
-	stmneia	r0!, {r1, r3, ip, lr}
+	stmneia	ip!, {r1, r3, r8, lr}
+	stmneia	ip!, {r1, r3, r8, lr}
 	tst	r2, #16
-	stmneia	r0!, {r1, r3, ip, lr}
-	ldr	lr, [sp], #4
+	stmneia	ip!, {r1, r3, r8, lr}
+	ldmfd	sp!, {r8, lr}

 #else

@ -74,54 +61,63 @@ ENTRY(memset)
 * whole cache lines at once.
 */

-	stmfd	sp!, {r4-r7, lr}
+	stmfd	sp!, {r4-r8, lr}
 	mov	r4, r1
 	mov	r5, r1
 	mov	r6, r1
 	mov	r7, r1
-	mov	ip, r1
+	mov	r8, r1
 	mov	lr, r1

 	cmp	r2, #96
-	tstgt	r0, #31
+	tstgt	ip, #31
 	ble	3f

-	and	ip, r0, #31
-	rsb	ip, ip, #32
-	sub	r2, r2, ip
-	movs	ip, ip, lsl #(32 - 4)
-	stmcsia	r0!, {r4, r5, r6, r7}
-	stmmiia	r0!, {r4, r5}
-	tst	ip, #(1 << 30)
-	mov	ip, r1
-	strne	r1, [r0], #4
+	and	r8, ip, #31
+	rsb	r8, r8, #32
+	sub	r2, r2, r8
+	movs	r8, r8, lsl #(32 - 4)
+	stmcsia	ip!, {r4, r5, r6, r7}
+	stmmiia	ip!, {r4, r5}
+	tst	r8, #(1 << 30)
+	mov	r8, r1
+	strne	r1, [ip], #4

 3:	subs	r2, r2, #64
-	stmgeia	r0!, {r1, r3-r7, ip, lr}
-	stmgeia	r0!, {r1, r3-r7, ip, lr}
+	stmgeia	ip!, {r1, r3-r8, lr}
+	stmgeia	ip!, {r1, r3-r8, lr}
 	bgt	3b
-	ldmeqfd	sp!, {r4-r7, pc}
+	ldmeqfd	sp!, {r4-r8, pc}

 	tst	r2, #32
-	stmneia	r0!, {r1, r3-r7, ip, lr}
+	stmneia	ip!, {r1, r3-r8, lr}
 	tst	r2, #16
-	stmneia	r0!, {r4-r7}
-	ldmfd	sp!, {r4-r7, lr}
+	stmneia	ip!, {r4-r7}
+	ldmfd	sp!, {r4-r8, lr}

 #endif

 4:	tst	r2, #8
-	stmneia	r0!, {r1, r3}
+	stmneia	ip!, {r1, r3}
 	tst	r2, #4
-	strne	r1, [r0], #4
+	strne	r1, [ip], #4
 /*
 * When we get here, we've got less than 4 bytes to zero.  We
 * may have an unaligned pointer as well.
 */
 5:	tst	r2, #2
-	strneb	r1, [r0], #1
-	strneb	r1, [r0], #1
+	strneb	r1, [ip], #1
+	strneb	r1, [ip], #1
 	tst	r2, #1
-	strneb	r1, [r0], #1
+	strneb	r1, [ip], #1
 	mov	pc, lr
+
+6:	subs	r2, r2, #4		@ 1 do we have enough
+	blt	5b			@ 1 bytes to align with?
+	cmp	r3, #2			@ 1
+	strltb	r1, [ip], #1		@ 1
+	strleb	r1, [ip], #1		@ 1
+	strb	r1, [ip], #1		@ 1
+	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
+	b	1b
 ENDPROC(memset)
--- a/arch/arm/mach-at91/board-foxg20.c
+++ b/arch/arm/mach-at91/board-foxg20.c
@ -176,6 +176,7 @@ static struct w1_gpio_platform_data w1_gpio_pdata = {
 	/* If you choose to use a pin other than PB16 it needs to be 3.3V */
 	.pin		= AT91_PIN_PB16,
 	.is_open_drain  = 1,
+	.ext_pullup_enable_pin	= -EINVAL,
 };

 static struct platform_device w1_device = {
--- a/arch/arm/mach-at91/board-stamp9g20.c
+++ b/arch/arm/mach-at91/board-stamp9g20.c
@ -188,6 +188,7 @@ static struct spi_board_info portuxg20_spi_devices[] = {
 static struct w1_gpio_platform_data w1_gpio_pdata = {
 	.pin		= AT91_PIN_PA29,
 	.is_open_drain	= 1,
+	.ext_pullup_enable_pin	= -EINVAL,
 };

 static struct platform_device w1_device = {
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@ -172,7 +172,7 @@ static struct clk *clk[clk_max];
 static struct clk_onecell_data clk_data;

 static enum mx6q_clks const clks_init_on[] __initconst = {
-	mmdc_ch0_axi, rom,
+	mmdc_ch0_axi, rom, pll1_sys,
 };

 static struct clk_div_table clk_enet_ref_table[] = {
--- a/arch/arm/mach-imx/headsmp.S
+++ b/arch/arm/mach-imx/headsmp.S
@ -26,16 +26,16 @@ ENDPROC(v7_secondary_startup)

 #ifdef CONFIG_PM
 /*
- * The following code is located into the .data section.  This is to
- * allow phys_l2x0_saved_regs to be accessed with a relative load
- * as we are running on physical address here.
+ * The following code must assume it is running from physical address
+ * where absolute virtual addresses to the data section have to be
+ * turned into relative ones.
 */
-	.data
-	.align

 #ifdef CONFIG_CACHE_L2X0
 	.macro	pl310_resume
-	ldr	r2, phys_l2x0_saved_regs
+	adr	r0, l2x0_saved_regs_offset
+	ldr	r2, [r0]
+	add	r2, r2, r0
 	ldr	r0, [r2, #L2X0_R_PHY_BASE]	@ get physical base of l2x0
 	ldr	r1, [r2, #L2X0_R_AUX_CTRL]	@ get aux_ctrl value
 	str	r1, [r0, #L2X0_AUX_CTRL]	@ restore aux_ctrl
@ -43,9 +43,9 @@ ENDPROC(v7_secondary_startup)
 	str	r1, [r0, #L2X0_CTRL]		@ re-enable L2
 	.endm

-	.globl	phys_l2x0_saved_regs
-phys_l2x0_saved_regs:
-        .long   0
+l2x0_saved_regs_offset:
+	.word	l2x0_saved_regs - .
+
 #else
 	.macro	pl310_resume
 	.endm
--- a/arch/arm/mach-imx/pm-imx6q.c
+++ b/arch/arm/mach-imx/pm-imx6q.c
@ -22,8 +22,6 @@
 #include "common.h"
 #include "hardware.h"

-extern unsigned long phys_l2x0_saved_regs;
-
 static int imx6q_suspend_finish(unsigned long val)
 {
 	cpu_do_idle();
@ -57,18 +55,5 @@ static const struct platform_suspend_ops imx6q_pm_ops = {

 void __init imx6q_pm_init(void)
 {
-	/*
-	 * The l2x0 core code provides an infrastucture to save and restore
-	 * l2x0 registers across suspend/resume cycle.  But because imx6q
-	 * retains L2 content during suspend and needs to resume L2 before
-	 * MMU is enabled, it can only utilize register saving support and
-	 * have to take care of restoring on its own.  So we save physical
-	 * address of the data structure used by l2x0 core to save registers,
-	 * and later restore the necessary ones in imx6q resume entry.
-	 */
-#ifdef CONFIG_CACHE_L2X0
-	phys_l2x0_saved_regs = __pa(&l2x0_saved_regs);
-#endif
-
 	suspend_set_ops(&imx6q_pm_ops);
 }
--- a/arch/arm/mach-ixp4xx/vulcan-setup.c
+++ b/arch/arm/mach-ixp4xx/vulcan-setup.c
@ -163,6 +163,7 @@ static struct platform_device vulcan_max6369 = {

 static struct w1_gpio_platform_data vulcan_w1_gpio_pdata = {
 	.pin			= 14,
+	.ext_pullup_enable_pin	= -EINVAL,
 };

 static struct platform_device vulcan_w1_gpio = {
--- a/Show More
+++ b/Show More