Add support for lzma compressed uImage.
Support for gzip was already available but could not be enabled because
we were missing CONFIG_HAVE_KERNEL_GZIP in arch/arc/Kconfig.
Signed-off-by: Daniel Mentz <danielmentz@google.com>
Cc: linux-snps-arc@lists.infradead.org
Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
The IDU intc is technically part of MCIP (Multi-core IP) hence
historically was only available in a SMP hardware build (and thus only
in a SMP kernel build). Now that hardware restriction has been lifted,
so a UP kernel needs to support it.
This requires breaking mcip.c into parts which are strictly SMP
(inter-core interrupts) and IDU which in reality is just another
intc and thus has no bearing on SMP.
This change allows IDU in UP builds and with a suitable device tree, we
can have the cascaded intc system
ARCv2 core intc <---> ARCv2 IDU intc <---> periperals
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Seem like values assigned as absolute number and not and
shift value, i.e. should be 0 for one node (2^0) and 1 for
couple of nodes (2^1)
Signed-off-by: Noam Camus <noamca@mellanox.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
ARCv2 ISA provides 64-bit exclusive load/stores so use them to implement
the 64-bit atomics and elide the spinlock based generic 64-bit atomics
boot tested with atomic64 self-test (and GOD bless the person who wrote
them, I realized my inline assmebly is sloppy as hell)
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-snps-arc@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
This reverts commit e78fdfef84.
The issue was fixed in hardware in HS2.1C release and there are no known
external users of affected RTL so revert the whole delayed retry series !
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
ARC700 support for 2 interrupt priorities historically allowed even slow
perpherals such as emac and uart to setup high priority interrupts
which was wrong from the beginning as they could possibly delay the more
critical timer interrupt.
The hardware support for 2 level interrupts in ARCompact is less than
ideal anyways (judging from the "hacks" in low level entry code and thus
is not used in productions systems I know of.
So reduce the scope of this to timer only, thereby reducing a bunch of
complexity.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
The binary GCD algorithm is based on the following facts:
1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b)
Even on x86 machines with reasonable division hardware, the binary
algorithm runs about 25% faster (80% the execution time) than the
division-based Euclidian algorithm.
On platforms like Alpha and ARMv6 where division is a function call to
emulation code, it's even more significant.
There are two variants of the code here, depending on whether a fast
__ffs (find least significant set bit) instruction is available. This
allows the unpredictable branches in the bit-at-a-time shifting loop to
be eliminated.
If fast __ffs is not available, the "even/odd" GCD variant is used.
I use the following code to benchmark:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#define swap(a, b) \
do { \
a ^= b; \
b ^= a; \
a ^= b; \
} while (0)
unsigned long gcd0(unsigned long a, unsigned long b)
{
unsigned long r;
if (a < b) {
swap(a, b);
}
if (b == 0)
return a;
while ((r = a % b) != 0) {
a = b;
b = r;
}
return b;
}
unsigned long gcd1(unsigned long a, unsigned long b)
{
unsigned long r = a | b;
if (!a || !b)
return r;
b >>= __builtin_ctzl(b);
for (;;) {
a >>= __builtin_ctzl(a);
if (a == b)
return a << __builtin_ctzl(r);
if (a < b)
swap(a, b);
a -= b;
}
}
unsigned long gcd2(unsigned long a, unsigned long b)
{
unsigned long r = a | b;
if (!a || !b)
return r;
r &= -r;
while (!(b & r))
b >>= 1;
for (;;) {
while (!(a & r))
a >>= 1;
if (a == b)
return a;
if (a < b)
swap(a, b);
a -= b;
a >>= 1;
if (a & r)
a += b;
a >>= 1;
}
}
unsigned long gcd3(unsigned long a, unsigned long b)
{
unsigned long r = a | b;
if (!a || !b)
return r;
b >>= __builtin_ctzl(b);
if (b == 1)
return r & -r;
for (;;) {
a >>= __builtin_ctzl(a);
if (a == 1)
return r & -r;
if (a == b)
return a << __builtin_ctzl(r);
if (a < b)
swap(a, b);
a -= b;
}
}
unsigned long gcd4(unsigned long a, unsigned long b)
{
unsigned long r = a | b;
if (!a || !b)
return r;
r &= -r;
while (!(b & r))
b >>= 1;
if (b == r)
return r;
for (;;) {
while (!(a & r))
a >>= 1;
if (a == r)
return r;
if (a == b)
return a;
if (a < b)
swap(a, b);
a -= b;
a >>= 1;
if (a & r)
a += b;
a >>= 1;
}
}
static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
gcd0, gcd1, gcd2, gcd3, gcd4,
};
#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))
#if defined(__x86_64__)
#define rdtscll(val) do { \
unsigned long __a,__d; \
__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
(val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \
} while(0)
static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
unsigned long a, unsigned long b, unsigned long *res)
{
unsigned long long start, end;
unsigned long long ret;
unsigned long gcd_res;
rdtscll(start);
gcd_res = gcd(a, b);
rdtscll(end);
if (end >= start)
ret = end - start;
else
ret = ~0ULL - start + 1 + end;
*res = gcd_res;
return ret;
}
#else
static inline struct timespec read_time(void)
{
struct timespec time;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time);
return time;
}
static inline unsigned long long diff_time(struct timespec start, struct timespec end)
{
struct timespec temp;
if ((end.tv_nsec - start.tv_nsec) < 0) {
temp.tv_sec = end.tv_sec - start.tv_sec - 1;
temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec - start.tv_sec;
temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
return temp.tv_sec * 1000000000ULL + temp.tv_nsec;
}
static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long),
unsigned long a, unsigned long b, unsigned long *res)
{
struct timespec start, end;
unsigned long gcd_res;
start = read_time();
gcd_res = gcd(a, b);
end = read_time();
*res = gcd_res;
return diff_time(start, end);
}
#endif
static inline unsigned long get_rand()
{
if (sizeof(long) == 8)
return (unsigned long)rand() << 32 | rand();
else
return rand();
}
int main(int argc, char **argv)
{
unsigned int seed = time(0);
int loops = 100;
int repeats = 1000;
unsigned long (*res)[TEST_ENTRIES];
unsigned long long elapsed[TEST_ENTRIES];
int i, j, k;
for (;;) {
int opt = getopt(argc, argv, "n:r:s:");
/* End condition always first */
if (opt == -1)
break;
switch (opt) {
case 'n':
loops = atoi(optarg);
break;
case 'r':
repeats = atoi(optarg);
break;
case 's':
seed = strtoul(optarg, NULL, 10);
break;
default:
/* You won't actually get here. */
break;
}
}
res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops);
memset(elapsed, 0, sizeof(elapsed));
srand(seed);
for (j = 0; j < loops; j++) {
unsigned long a = get_rand();
/* Do we have args? */
unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
unsigned long long min_elapsed[TEST_ENTRIES];
for (k = 0; k < repeats; k++) {
for (i = 0; i < TEST_ENTRIES; i++) {
unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]);
if (k == 0 || min_elapsed[i] > tmp)
min_elapsed[i] = tmp;
}
}
for (i = 0; i < TEST_ENTRIES; i++)
elapsed[i] += min_elapsed[i];
}
for (i = 0; i < TEST_ENTRIES; i++)
printf("gcd%d: elapsed %llu\n", i, elapsed[i]);
k = 0;
srand(seed);
for (j = 0; j < loops; j++) {
unsigned long a = get_rand();
unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand();
for (i = 1; i < TEST_ENTRIES; i++) {
if (res[j][i] != res[j][0])
break;
}
if (i < TEST_ENTRIES) {
if (k == 0) {
k = 1;
fprintf(stderr, "Error:\n");
}
fprintf(stderr, "gcd(%lu, %lu): ", a, b);
for (i = 0; i < TEST_ENTRIES; i++)
fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n");
}
}
if (k == 0)
fprintf(stderr, "PASS\n");
free(res);
return 0;
}
Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got:
zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
gcd0: elapsed 10174
gcd1: elapsed 2120
gcd2: elapsed 2902
gcd3: elapsed 2039
gcd4: elapsed 2812
PASS
zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
gcd0: elapsed 9309
gcd1: elapsed 2280
gcd2: elapsed 2822
gcd3: elapsed 2217
gcd4: elapsed 2710
PASS
zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
gcd0: elapsed 9589
gcd1: elapsed 2098
gcd2: elapsed 2815
gcd3: elapsed 2030
gcd4: elapsed 2718
PASS
zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10
gcd0: elapsed 9914
gcd1: elapsed 2309
gcd2: elapsed 2779
gcd3: elapsed 2228
gcd4: elapsed 2709
PASS
[akpm@linux-foundation.org: avoid #defining a CONFIG_ variable]
Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com>
Signed-off-by: George Spelvin <linux@horizon.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
On ARC, lower 2G of address space is translated and used for
- user vaddr space (region 0 to 5)
- unused kernel-user gutter (region 6)
- kernel vaddr space (region 7)
where each region simply represents 256MB of address space.
The kernel vaddr space of 256MB is used to implement vmalloc, modules
So far this was enough, but not on EZChip system with 4K CPUs (given
that per cpu mechanism uses vmalloc for allocating chunks)
So allow VMALLOC_SIZE to be configurable by expanding down into the unused
kernel-user gutter region which at default 256M was excessive anyways.
Also use _BITUL() to fix a build error since PGDIR_SIZE cannot use "1UL"
as called from assembly code in mm/tlbex.S
Signed-off-by: Noam Camus <noamc@ezchip.com>
[vgupta: rewrote changelog, debugged bootup crash due to int vs. hex]
Acked-by: Vineet Gupta <vgupta@synopsys.com>
The primary interrupt handler arch_do_IRQ() was passing hwirq as linux
virq to core code. This was fragile and worked so far as we only had legacy/linear
domains.
This came out of a rant by Marc Zyngier.
http://lists.infradead.org/pipermail/linux-snps-arc/2015-December/000298.html
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
- call clocksource_probe()
- This in turns needs of_clk_init() to be called earlier
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Noam Camus <noamc@ezchip.com>
[vgupta: broken off from a bigger patch]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Initial HIGHMEM support on ARC was introduced for PAE40 where the low
memory (0x8000_0000 based) and high memory (0x1_0000_0000) were
physically contiguous. So CONFIG_FLATMEM sufficed (despite a peipheral
hole in the middle, which wasted a bit of struct page memory, but things
worked).
However w/o PAE, highmem was not possible and we could only reach
~1.75GB of DDR. Now there is a use case to access ~4GB of DDR w/o PAE40
The idea is to have low memory at canonical 0x8000_0000 and highmem
at 0 so enire 4GB address space is available for physical addressing
This needs additional platform/interconnect mapping to convert
the non contiguous physical addresses into linear bus adresses.
From Linux point of view, non contiguous divide means FLATMEM no
longer works and DISCONTIGMEM is needed to track the pfns in the 2
regions.
This scheme would also work for PAE40, only better in that we don't
waste struct page memory for the peripheral hole.
The DT description will be something like
memory {
...
reg = <0x80000000 0x200000000 /* 512MB: lowmem */
0x00000000 0x10000000>; /* 256MB: highmem */
}
Signed-off-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Commit 5f8fc43217 ("PCI: Include pci/pcie/Kconfig directly from
pci/Kconfig") in linux-next changed drivers/pci/Kconfig to include
drivers/pci/pcie/Kconfig itself, so that architectures do not need
to source both files themselves. ARC just recently gained PCI support
through commit 6b3fb77998dd ("ARC: Add PCI support"), but this change
was based on the old behaviour of the Kconfig files. This makes
Kconfig now spit out the following warnings:
drivers/pci/pcie/Kconfig:61:warning: choice value used outside its choice group
drivers/pci/pcie/Kconfig:67:warning: choice value used outside its choice group
drivers/pci/pcie/Kconfig:74:warning: choice value used outside its choice group
This change updates the Kconfig file for ARC, dropping the now
unnecessary 'source' statement, which makes the warning disappear.
Signed-off-by: Andreas Ziegler <andreas.ziegler@fau.de>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
- Big Endian io accessors fix [Lada]
- Spellos fixes [Adam]
- Fix for DW GMAC breakage [Alexey]
- Making DMA API 64-bit ready
- Shutting up -Wmaybe-uninitialized noise for ARC
- Other minor fixes here and there, comments update
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1
iQIcBAABAgAGBQJW738lAAoJEGnX8d3iisJeProP/icm32aIHY0QXmJCBXCmQfLa
HHzfBeJ2KsG8pIRgrvraK3FJkmFr+WxZ7x6b5hPNYeHIT3c179/GZ3DlssM1md0u
sa50o5jmwd/J4o5jCKpUB/hx7wiAjpC2CYb6qIg39A2Nq5JhOFJV30XMbCscXkLI
ae/o8oATi1502cf1OQ2EqNWKfME4ogG1KsEUNrSzcd+1P8LZxsnEVBmXuPHVdHLw
kTHVgmCELsEchaV/QY9pY+uHkm9Y4vV18v0vqbklwED+cHkjmXQ2UysP3/J8KXKN
PVSqmtUJIS2vxDGK5mWvz6jkWmU8gRXoT14ZqdmMARmhVhp3+JTm2fQ53NUwZ+b2
JpPNGWVQRi86AaiUE8Fm+eWjC242CAm+lsBfx+mvqWpEvFGMlnRKw8oZiyeJhhIw
3M1yrulQG7QbTSuQrgQwfGqtrhl2nnq+X0uoMJXYHupNDQ42QK8wmJ9bT7cmutD0
K3Tmi84qoiSnN/HhWK/D9d60bLGvUY4RKiLjAcJz7lbMjtRhT/rpFFcFYCIhJyZs
y//jOZK67o1ecDXBTaUcvT+edOrQVsmatn3w0p9VwATe8OiKHsLA/0UD34gwiECy
o9g/i4tc2GfOLFoLv66czXTU9IuoKDh3HrTJgET7r1Re/+FKgJ+2+GX6AbiJzbhY
9jsAAI/ZpsS6qMhvSz3d
=n0fk
-----END PGP SIGNATURE-----
Merge tag 'arc-4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
Pull ARC architecture updates from Vineet Gupta:
- Big Endian io accessors fix [Lada]
- Spellos fixes [Adam]
- Fix for DW GMAC breakage [Alexey]
- Making DMA API 64-bit ready
- Shutting up -Wmaybe-uninitialized noise for ARC
- Other minor fixes here and there, comments update
* tag 'arc-4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc: (21 commits)
ARCv2: ioremap: Support dynamic peripheral address space
ARC: dma: reintroduce platform specific dma<->phys
ARC: dma: ioremap: use phys_addr_t consistenctly in code paths
ARC: dma: pass_phys() not sg_virt() to cache ops
ARC: dma: non-coherent pages need V-P mapping if in HIGHMEM
ARC: dma: Use struct page based page allocator helpers
ARC: build: Turn off -Wmaybe-uninitialized for ARC gcc 4.8
ARC: [plat-axs10x] add Ethernet PHY description in .dts
arc: use of_platform_default_populate() to populate default bus
ARC: thp: unbork !CONFIG_TRANSPARENT_HUGEPAGE build
arc: [plat-nsimosci*] use ezchip network driver
ARCv2: LLSC: software backoff is NOT needed starting HS2.1c
ARC: mm: Use virt_to_pfn() for addr >> PAGE_SHIFT pattern
ARC: [plat-nsim] document ranges
ARC: build: Better way to detect ISA compatible toolchain
ARCv2: Allow enabling PAE40 w/o HIGHMEM
ARC: [BE] readl()/writel() to work in Big Endian CPU configuration
ARC: [*defconfig] No need to specify CONFIG_CROSS_COMPILE
ARC: [BE] Select correct CROSS_COMPILE prefix
ARC: bitops: Remove non relevant comments
...
This allows for regression testing in PAE specific code as we lack
a 32+ bit physical memory platform other than nSIM.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Add PCI support to ARC and update drivers/pci Makefile enabling the ARC
arch to use the generic PCI setup functions.
[bhelgaas: fold in Joao's pci-dma-compat.h & pci-bridge.h build fix (I
should have caught this myself, sorry]
Signed-off-by: Joao Pinto <jpinto@synopsys.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Even though DEVTMPFS is required when our pre-built initramfs
is used it is not the case in general. It is perfectly possible
to use initramfs with device nodes already populated or there
could be other usages, see discussion below for more detials:
http://thread.gmane.org/gmane.comp.embedded.openwrt.devel/37819/focus=37821
This change removes mentioned dependency from arch/arc/Kconfig
updating instead those defconfigs that are usually used with this
kind of pre-build initramfs.
And while at it all touched defconfigs were regenerated via
savedefconfig and some options were removed:
* USB is selected by other options implicitly
* VGA_CONSOLE is disableb for ARC since
031e29b587
* EXT3_FS automatically selects EXT4_FS
* MTDxxx and JFFS2_FS make no sense for AXS because
AXS NAND controller is not upstreamed
* NET_OSCI_LAN is not in upstream as well
* ARCPGU_xxx options make no sense because ARC PGU is not yet
in upstream and when it gets there all config options would
be taken from devicetree
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
It is unlikely that designs running Linux will not have multiplier.
Further the current support is not complete as tool don't generate a
multilib w/o multiplier.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
- Corner case of returning to delay slot from interrupt
- Changing default interrupt prioiry level
- Kconfig'ize support for super pages
- Other minor fixes
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1
iQIcBAABAgAGBQJWvxdUAAoJEGnX8d3iisJeRkYP/2HZAt4J6c5MPk/NSy8rabVX
2bB1m5jYXlBmJAIsmWm+WcDL72MdrB1Owtc5tEN+hIoQQa2QQpxolp32IslHg0o8
C9CCzmF+iR8wz3caVk3javpsbze23XbHho/kdx/l2Ed3Fi+syI/9jF1GiboydRtR
X22an1lslA6Y44pYxFmSFcMCv7XclFkJNe1ltxsgN9/QapnNrE/HWqUIy+SMr2Oo
Tpo3m/Dc+IfMMejYyupc3keyAhyeux69lJXPuOzYiurgGUIyXz15Un2mQ9gZWf0u
W56L/55VpQVuah46qrp5CBTLmdJA5cBqr0F8RqmZAqrEYLgn5SD4IhDjamo1qsP/
FfFh0cG955SoEyCsUOPILWUFR5TeS4rJK+ZJjErUb+dwEC1BWZR0/Dn1s9KJN8b7
GgGV8yXruDACFlFnCqnlxVs1TKOPOUqD2NZRAdsKunp+ywNrvGdD43xWONcriyvr
2KW0nb+mH3RRk8HQzKjfqsVhLMoR7n1MD/+tg8ME8usLn1ik0hBerT56CX0Wh/yQ
VnOUX6xqlaRydeJJgCUyByz3+jJVvj8sk/VZbr19F0p9id6wpiPQeNus2AcoHFKW
OyvWcfxzqKegXrYtMsy8IoFzx73zJaXV3ht0I09rhAj3JkdF7vFEIUpKIhsWqxAK
yWKKqLcVKga/2Yc8jduI
=FNDd
-----END PGP SIGNATURE-----
Merge tag 'arc-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
Pull ARC fixes from Vineet Gupta:
"I've been sitting on some of these fixes for a while.
- Corner case of returning to delay slot from interrupt
- Changing default interrupt prioiry level
- Kconfig'ize support for super pages
- Other minor fixes"
* tag 'arc-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
ARC: mm: Introduce explicit super page size support
ARCv2: intc: Allow interruption by lowest priority interrupt
ARCv2: Check for LL-SC livelock only if LLSC is enabled
ARC: shrink cpuinfo by not saving full timer BCR
ARCv2: clocksource: Rename GRTC -> GFRC ...
ARCv2: STAR 9000950267: Handle return from intr to Delay Slot #2
MMUv4 supports 2 concurrent page sizes: Normal and Super [4K to 16M]
So far Linux supported a single super page size for a given Normal page,
depending on the software page walking address split.
e.g. we had 11:8:13 address split for 8K page, which meant super page
was 2 ^(8+13) = 2M (given that THP size has to be PMD_SHIFT)
Now we turn this around, by allowing multiple Super Pages in Kconfig
(currently 2M and 16M only) and forcing page walker address split to
PGDIR_SHIFT and PAGE_SHIFT
For configs without Super page, things are same as before and
PGDIR_SHIFT can be hacked to get non default address split
The motivation for this change is a customer who needs 16M super page
and a 8K Normal page combo.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
As illustrated by commit a3afe70b83 ("[S390] latencytop s390
support."), HAVE_LATENCYTOP_SUPPORT is defined by an architecture to
advertise an implementation of save_stack_trace_tsk.
However, as of 9212ddb5ea ("stacktrace: provide save_stack_trace_tsk()
weak alias") a dummy implementation is provided if STACKTRACE=y. Given
that LATENCYTOP already depends on STACKTRACE_SUPPORT and selects
STACKTRACE, we can remove HAVE_LATENCYTOP_SUPPORT altogether.
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Helge Deller <deller@gmx.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
HIGHMEM support bumped the default memory size for nsim platform to 1G.
Thus total memory ended at the very edge of start of peripherals address
space. With linux link base shifted, memory started bleeding into
peripheral space which caused early boot bad_page spew !
Fixes: 29e332261d ("ARC: mm: HIGHMEM: populate high memory from DT")
Reported-by: Anton Kolesov <akolesov@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
This is the first working implementation of 40-bit physical address
extension on ARCv2.
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Implement kmap* API for ARC.
This enables
- permanent kernel maps (pkmaps): :kmap() API
- fixmap : kmap_atomic()
We use a very simple/uniform approach for both (unlike some of the other
arches). So fixmap doesn't use the customary compile time address stuff.
The important semantic is sleep'ability (pkmap) vs. not (fixmap) which
the API guarantees.
Note that this patch only enables highmem for subsequent PAE40 support
as there is no real highmem for ARC in pure 32-bit paradigm as explained
below.
ARC has 2:2 address split of the 32-bit address space with lower half
being translated (virtual) while upper half unstranslated
(0x8000_0000 to 0xFFFF_FFFF). kernel itself is linked at base of
unstranslated space (i.e. 0x8000_0000 onwards), which is mapped to say
DDR 0x0 by external Bus Glue logic (outside the core). So kernel can
potentially access 1.75G worth of memory directly w/o need for highmem.
(the top 256M is taken by uncached peripheral space from 0xF000_0000 to
0xFFFF_FFFF)
In PAE40, hardware can address memory beyond 4G (0x1_0000_0000) while
the logical/virtual addresses remain 32-bits. Thus highmem is required
for kernel proper to be able to access these pages for it's own purposes
(user space is agnostic to this anyways).
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
For Run-on-reset, non masters need to spin wait. For Halt-on-reset they
can jump to entry point directly.
Also while at it, made reset vector handler as "the" entry point for
kernel including host debugger based boot (which uses the ELF header
entry point)
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP
support.
Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a
new bit "SZ" in TLB page desciptor to distinguish between them.
Super Page size is configurable in hardware (4K to 16M), but fixed once
RTL builds.
The exact THP size a Linx configuration will support is a function of:
- MMU page size (typical 8K, RTL fixed)
- software page walker address split between PGD:PTE:PFN (typical
11:8:13, but can be changed with 1 line)
So for above default, THP size supported is 8K * 256 = 2M
Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime
reduces to 1 level (as PTE is folded into PGD and canonically referred
to as PMD).
Thus thp PMD accessors are implemented in terms of PTE (just like sparc)
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
ARC doesn't need the runtime detection of futex cmpxchg op
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
This is to workaround the llock/scond livelock
HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
coherency transactions in the SCU. The exclusive line state keeps rotating
among contenting cores leading to a never ending cycle. So break the cycle
by deferring the retry of failed exclusive access (SCOND). The actual delay
needed is function of number of contending cores as well as the unrelated
coherency traffic from other cores. To keep the code simple, start off with
small delay of 1 which would suffice most cases and in case of contention
double the delay. Eventually the delay is sufficient such that the coherency
pipeline is drained, thus a subsequent exclusive access would succeed.
Link: http://lkml.kernel.org/r/1438612568-28265-1-git-send-email-vgupta@synopsys.com
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Being highly configurable core ARC HS among other features might be
configured with or without DIV_REM_OPTION (hardware divider).
That option when enabled adds following instructions: div, divu, rem, remu.
By default ARC HS38 has this option enabled. So we add here possibility
to disable usage of hardware divider by compiler.
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Caveats about cache flush on ARCv2 based cores
- dcache is PIPT so paddr is sufficient for cache maintenance ops (no
need to setup PTAG reg
- icache is still VIPT but only aliasing configs need PTAG setup
So basically this is departure from MMU-v3 which always need vaddr in
line ops registers (DC_IVDL, DC_FLDL, IC_IVIL) but paddr in DC_PTAG,
IC_PTAG respectively.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
The AXS10x platforms consist of a mainboard with peripherals,
on which several daughter cards can be placed. The daughter cards
typically contain a CPU and memory.
Signed-off-by: Mischa Jonker <mjonker@synopsys.com>
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
* Remove remanants of legacy ARC FPGA platforms (AA4, ML509...)
* Only nsim simulation platform is left, rename platform accordingly
* AA4 DT stuff is compatible with nsim for ARC700 so rename it too
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Back when ARC700 4.10 was released, the related kernel features were
tied to this config item so they could be disabled in one shot (i.e.
LLOCK/SCOND, SWAPE, RTSC..)
That having happened a while back, all new ARC customers weill get 4.11+
so those features can be assumed to be present and need not be tied to a
top-level (we still retain the ability to individually disable them).
Further, since ARCv2 also shares some of those feautes, removing it
simplifies things a bit in Kconfig
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Even though ARC cores itself don't have any power management except SLEEP
state it's possible to use power management features of selected peripherals.
For example USB OTG requires PM_RUNTIME which is only available if
kernel/power/Kconfig is sourced by architecture.
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Architectures only need a Kconfig entry for NO_DMA if it is possible
that its value will be 'y'. For arc its value will always be 'n', making
it pointless. Remove it.
Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
All the platforms do the same thing in init_machine callback so move it
out of callback into caller of callback
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
If the renamed symbol is defined lib/iomap.c implements ioport_map and
ioport_unmap and currently (nearly) all platforms define the port
accessor functions outb/inb and friend unconditionally. So
HAS_IOPORT_MAP is the better name for this.
Consequently NO_IOPORT is renamed to NO_IOPORT_MAP.
The motivation for this change is to reintroduce a symbol HAS_IOPORT
that signals if outb/int et al are available. I will address that at
least one merge window later though to keep surprises to a minimum and
catch new introductions of (HAS|NO)_IOPORT.
The changes in this commit were done using:
$ git grep -l -E '(NO|HAS)_IOPORT' | xargs perl -p -i -e 's/\b((?:CONFIG_)?(?:NO|HAS)_IOPORT)\b/$1_MAP/'
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
A number of Kconfig entries default to (uppercase) "N". It was clearly
intended to use "default n". But since (lowercase) "n" is the default
anyway, these lines might as well be removed.
Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Remove an outdated reference to "most personal computers" having only one
CPU, and change the use of "singleprocessor" and "single processor" in
CONFIG_SMP's documentation to "uniprocessor" across all arches where that
documentation is present.
Signed-off-by: Robert Graffham <psquid@psquid.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
* Moved cmdline copy from asm to "C" - allows for more robust checking
of pointer validity etc.
* Remove the Kconfig option to do so, base it on a runtime value passed
by u-boot
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
We've switched over every architecture that supports SMP to it, so
remove the new useless config variable.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Lockdep required a small fix to stacktrace API which was incorrectly
unwindign out of __switch_to for the current call frame.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
After the last architecture switched to generic hard irqs the config
options HAVE_GENERIC_HARDIRQS & GENERIC_HARDIRQS and the related code
for !CONFIG_GENERIC_HARDIRQS can be removed.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Merge Kconfig menu diet patches from Dave Hansen:
"I think the "Kernel Hacking" menu has gotten a bit out of hand. It is
over 120 lines long on my system with everything enabled and options
are scattered around it haphazardly.
http://sr71.net/~dave/linux/kconfig-horror.png
Let's try to introduce some sanity. This set takes that 120 lines
down to 55 and makes it vastly easier to find some things. It's a
start.
This set stands on its own, but there is plenty of room for follow-up
patches. The arch-specific debug options still end up getting stuck
in the top-level "kernel hacking" menu. OPTIMIZE_INLINING, for
instance, could obviously go in to the "compiler options" menu, but
the fact that it is defined in arch/ in a separate Kconfig file keeps
it on its own for the moment.
The Signed-off-by's in here look funky. I changed employers while
working on this set, so I have signoffs from both email addresses"
* emailed patches from Dave Hansen <dave@sr71.net>:
hang and lockup detection menu
kconfig: consolidate printk options
group locking debugging options
consolidate compilation option configs
consolidate runtime testing configs
order memory debugging Kconfig options
consolidate per-arch stack overflow debugging options
Original posting:
http://lkml.kernel.org/r/20121214184202.F54094D9@kernel.stglabs.ibm.com
Several architectures have similar stack debugging config options.
They all pretty much do the same thing, some with slightly
differing help text.
This patch changes the architectures to instead enable a Kconfig
boolean, and then use that boolean in the generic Kconfig.debug
to present the actual menu option. This removes a bunch of
duplication and adds consistency across arches.
Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com> [for tile]
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1. For VM_EXEC based delayed dcache/icache flush, reduces the number of
flushes.
2. Makes this security feature ON by default rather than OFF before.
3. Applications can use mprotect() to selectively override this.
4. ELF binaries have a GNU_STACK segment which can easily override the
kernel default permissions.
For nested-functions/trampolines, gcc already auto-enables executable
stack in elf. Others needing this can use -Wl,-z,execstack option.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
This is the meat of the series which prevents any dcache alias creation
by always keeping the U and K mapping of a page congruent.
If a mapping already exists, and other tries to access the page, prev
one is flushed to physical page (wback+inv)
Essentially flush_dcache_page()/copy_user_highpage() create K-mapping
of a page, but try to defer flushing, unless U-mapping exist.
When page is actually mapped to userspace, update_mmu_cache() flushes
the K-mapping (in certain cases this can be optimised out)
Additonally flush_cache_mm(), flush_cache_range(), flush_cache_page()
handle the puring of stale userspace mappings on exit/munmap...
flush_anon_page() handles the existing U-mapping for anon page before
kernel reads it via the GUP path.
Note that while not complete, this is enough to boot a simple
dynamically linked Busybox based rootfs
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Infrastructure required to make the Linux kernel compile and boot on the
Abilis Systems TB10x series of SOCs based on ARC700 CPUs:
- Kmake related files (Kconfig, Makefile, tb10x_defconfig)
- TB10x platform initialisation
Signed-off-by: Christian Ruppert <christian.ruppert@abilis.com>
Signed-off-by: Pierrick Hascoet <pierrick.hascoet@abilis.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Given that DeviceTree /bootargs can provide similar functionality,
no point in providing duplicate infrastructure.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
This tracks mainline commit ae903caae2 "Bury the conditionals from
kernel_thread/kernel_execve series" which we missed out as ARC port was
not yet mainline.
[vgupta: commit log modified]
Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
commit "fs: make binfmt support for #! scripts modular and removable"
made support for #!scripts optional - thus need to include the Kconfig
file to get all relevant BINFMT_*
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Currently ARC_HAS_LLSC can be influenced by platform for SMP only using
ARC_HAS_COH_LLSC. For !SMP it defaults to "y".
It turns out that some customers can't support it all, even in UP.
So we change the semantics, and use a negative dependency ARC_CANT_LLSC.
Any platform (independent of SMP or !SMP) can select it to disable LLSC.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
All the current platforms can work with 0x8000_0000 based dma_addr_t
since the Bus Bridges typically ignore the top bit (the only excpetion
was Angel4 PCI-AHB bridge which we no longer care for).
That way we don't need plat-specific cpu-addr to bus-addr conversion.
Hooks still provided - just in case a platform has an obscure device
which say needs 0 based bus address.
That way <asm/dma_mapping.h> no longer needs to unconditinally include
<plat/dma_addr.h>
Also verfied that on Angel4 board, other peripherals (IDE-disk / EMAC)
work fine with 0x8000_0000 based dma addresses.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
This is more natural and is now doable since the choice constructs are
gone.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
This mini patchseries addresses the lack of multi-platform-image support
in ARC port.
Older build system only supported one platform(soc) to build at a time
and further only one board of that platform could be built. There was no
technical reason for that - we just didn't have the need.
So the first step towards multi-platform (and multi-board) builds it to
allow build system to do that.
So as applicable, <choice .. endchoice> => <menu .. endmenu>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Implement ioremap_prot() to allow mapping IO memory with variable
protection
via TLB.
Implementing this allows the /dev/mem driver to use its generic access()
VMA callback, which in turn allows ptrace to examine data in memory
mapped regions mapped via /dev/mem, such as Arc DCCM.
The end result is that it is possible to examine values of variables
placed into DCCM in user space programs via GDB.
CC: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
CC: Noam Camus <noamc@ezchip.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
This allows ARC Target to do I/O to host in absence of any peripherals
whatsoever, assisted by Metaware Hostlink facility.
Further we have a FUSE based filesystem which makes us mount/access host
filesystem on target and do fops.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
* Includes mapping of CCMs in address space
* Annotations to move arbitrary code/data into CCM
* Moving some of the critical code/data into CCM
* Runtime detection/reporting
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
ARC700 doesn't natively support unaligned access, but can be emulated
-Unaligned Access Exception
-Disassembly at the Fault address to find the exact insn (long/short)
Also per Arnd's comment, we runtime control it using 2 sysctl knobs:
* SYSCTL_ARCH_UNALIGN_ALLOW: Runtime enable/disble
* SYSCTL_ARCH_UNALIGN_NO_WARN: Warn on each emulation attempt
Originally contributed by Tim Yao <tim.yao@amlogic.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Tim Yao <tim.yao@amlogic.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
-Originally written by Rajeshwar Ranga
-Derived off of generic unwinder in 2.6.19 and adapted to ARC
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Rajeshwar Ranga <rajeshwar.ranga@gmail.com>
ARC common code to enable a SMP system + ISS provided SMP extensions.
ARC700 natively lacks SMP support, hence some of the core features are
are only enabled if SoCs have the necessary h/w pixie-dust. This
includes:
-Inter Processor Interrupts (IPI)
-Cache coherency
-load-locked/store-conditional
...
The low level exception handling would be completely broken in SMP
because we don't have hardware assisted stack switching. Thus a fair bit
of this code is repurposing the MMU_SCRATCH reg for event handler
prologues to keep them re-entrant.
Many thanks to Rajeshwar Ranga for his initial "major" contributions to
SMP Port (back in 2008), and to Noam Camus and Gilad Ben-Yossef for help
with resurrecting that in 3.2 kernel (2012).
Note that this platform code is again singleton design pattern - so
multiple SMP platforms won't build at the moment - this deficiency is
addressed in subsequent patches within this series.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rajeshwar Ranga <rajeshwar.ranga@gmail.com>
Cc: Noam Camus <noamc@ezchip.com>
Cc: Gilad Ben-Yossef <gilad@benyossef.com>
There is a bit of hack/kludge right now where we disable preemption if a
L2 (High prio) IRQ is taken while L1 (Low prio) is active.
Need to revisit this
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
This is minimal infrastructure needed for devicetree work.
It uses an a sample "skeleton" devicetree - embedded in kernel image -
to print the board, manufacturer by parsing the top-level "compatible"
string.
As of now we don't need any additional "board" specific "machine_desc".
TODO: support interpreting the command line as boot-loader passed dtb
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: devicetree-discuss@lists.ozlabs.org
Cc: Rob Herring <rob.herring@calxeda.com>
Cc: James Hogan <james.hogan@imgtec.com>
Reviewed-by: Rob Herring <rob.herring@calxeda.com>
Reviewed-by: James Hogan <james.hogan@imgtec.com>
Includes following fixes courtesy review by Al-Viro
* Tracer poke to Callee-regs were lost
Before going off into do_signal( ) we save the user-mode callee regs
(as they are not saved by default as part of pt_regs). This is to make
sure that that a Tracer (if tracing related signal) is able to do likes
of PEEKUSR(callee-reg).
However in return path we were simply discarding the user-mode callee
regs, which would break a POKEUSR(callee-reg) from a tracer.
* Issue related to multiple syscall restarts are addressed in next patch
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Acked-by: Jonas Bonn <jonas@southpole.se>
This includes support for generic clone/for/vfork/execve
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Arnd in his review pointed out that arch Kconfig organisation has several
deficiencies:
* Build time entries for things which can be runtime extracted from DT
(e.g. SDRAM size, core clk frequency..)
* Not multi-platform-image-build friendly (choice .. endchoice constructs)
* cpu variants support (750/770) is exclusive.
The first 2 have been fixed in subsequent patches.
Due to the nature of the 750 and 770, it is not possible to build for
both together, w/o special runtime glue code which would hurt
performance.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Sam Ravnborg <sam@ravnborg.org>