aboutsummaryrefslogtreecommitdiff
path: root/drivers/clk/kendryte/clk.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/clk/kendryte/clk.c')
-rw-r--r--drivers/clk/kendryte/clk.c606
1 files changed, 600 insertions, 6 deletions
diff --git a/drivers/clk/kendryte/clk.c b/drivers/clk/kendryte/clk.c
index 34e8e74..de9db84 100644
--- a/drivers/clk/kendryte/clk.c
+++ b/drivers/clk/kendryte/clk.c
@@ -2,17 +2,30 @@
/*
* Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
*/
-#include <kendryte/clk.h>
+#define LOG_CATEGORY UCLASS_CLK
#include <common.h>
-#include <dt-bindings/clock/k210-sysctl.h>
-#include <dt-bindings/mfd/k210-sysctl.h>
+#include <clk.h>
+#include <clk-uclass.h>
+#include <div64.h>
#include <dm.h>
#include <log.h>
#include <mapmem.h>
-
-#include <kendryte/bypass.h>
+#include <serial.h>
+#include <dt-bindings/clock/k210-sysctl.h>
+#include <dt-bindings/mfd/k210-sysctl.h>
#include <kendryte/pll.h>
+#include <linux/bitfield.h>
+
+/**
+ * struct k210_clk_priv - K210 clock driver private data
+ * @base: The base address of the sysctl device
+ * @in0: The "in0" external oscillator
+ */
+struct k210_clk_priv {
+ void __iomem *base;
+ struct clk in0;
+};
/*
* All parameters for different sub-clocks are collected into parameter arrays.
@@ -249,6 +262,30 @@ static const struct k210_mux_params k210_muxes[] = {
#undef MUX_LIST
/**
+ * struct k210_pll_params - K210 PLL parameters
+ * @off: The offset of the PLL from the base sysctl address
+ * @shift: The offset of the LSB of the lock status
+ * @width: The number of bits in the lock status
+ */
+struct k210_pll_params {
+ u8 off;
+ u8 shift;
+ u8 width;
+};
+
+static const struct k210_pll_params k210_plls[] = {
+#define PLL(_off, _shift, _width) { \
+ .off = (_off), \
+ .shift = (_shift), \
+ .width = (_width), \
+}
+ [0] = PLL(K210_SYSCTL_PLL0, 0, 2),
+ [1] = PLL(K210_SYSCTL_PLL1, 8, 1),
+ [2] = PLL(K210_SYSCTL_PLL2, 16, 1),
+#undef PLL
+};
+
+/**
* enum k210_clk_flags - The type of a K210 clock
* @K210_CLKF_MUX: This clock has a mux and not a static parent
* @K210_CLKF_PLL: This clock is a PLL
@@ -286,7 +323,6 @@ struct k210_clk_params {
};
};
-
static const struct k210_clk_params k210_clks[] = {
#if CONFIG_IS_ENABLED(CMD_CLK)
#define NAME(_name) .name = (_name),
@@ -382,6 +418,564 @@ static const struct k210_clk_params k210_clks[] = {
#undef CLK_LIST
};
+#define K210_PLL_CLKR GENMASK(3, 0)
+#define K210_PLL_CLKF GENMASK(9, 4)
+#define K210_PLL_CLKOD GENMASK(13, 10) /* Output Divider */
+#define K210_PLL_BWADJ GENMASK(19, 14) /* BandWidth Adjust */
+#define K210_PLL_RESET BIT(20)
+#define K210_PLL_PWRD BIT(21) /* PoWeReD */
+#define K210_PLL_INTFB BIT(22) /* Internal FeedBack */
+#define K210_PLL_BYPASS BIT(23)
+#define K210_PLL_TEST BIT(24)
+#define K210_PLL_EN BIT(25)
+#define K210_PLL_TEST_EN BIT(26)
+
+#define K210_PLL_LOCK 0
+#define K210_PLL_CLEAR_SLIP 2
+#define K210_PLL_TEST_OUT 3
+
+#ifdef CONFIG_CLK_K210_SET_RATE
+static int k210_pll_enable(struct k210_clk_priv *priv, int id);
+static int k210_pll_disable(struct k210_clk_priv *priv, int id);
+static ulong k210_pll_get_rate(struct k210_clk_priv *priv, int id, ulong rate_in);
+
+/*
+ * The PLL included with the Kendryte K210 appears to be a True Circuits, Inc.
+ * General-Purpose PLL. The logical layout of the PLL with internal feedback is
+ * approximately the following:
+ *
+ * +---------------+
+ * |reference clock|
+ * +---------------+
+ * |
+ * v
+ * +--+
+ * |/r|
+ * +--+
+ * |
+ * v
+ * +-------------+
+ * |divided clock|
+ * +-------------+
+ * |
+ * v
+ * +--------------+
+ * |phase detector|<---+
+ * +--------------+ |
+ * | |
+ * v +--------------+
+ * +---+ |feedback clock|
+ * |VCO| +--------------+
+ * +---+ ^
+ * | +--+ |
+ * +--->|/f|---+
+ * | +--+
+ * v
+ * +---+
+ * |/od|
+ * +---+
+ * |
+ * v
+ * +------+
+ * |output|
+ * +------+
+ *
+ * The k210 PLLs have three factors: r, f, and od. Because of the feedback mode,
+ * the effect of the division by f is to multiply the input frequency. The
+ * equation for the output rate is
+ * rate = (rate_in * f) / (r * od).
+ * Moving knowns to one side of the equation, we get
+ * rate / rate_in = f / (r * od)
+ * Rearranging slightly,
+ * abs_error = abs((rate / rate_in) - (f / (r * od))).
+ * To get relative, error, we divide by the expected ratio
+ * error = abs((rate / rate_in) - (f / (r * od))) / (rate / rate_in).
+ * Simplifying,
+ * error = abs(1 - f / (r * od)) / (rate / rate_in)
+ * error = abs(1 - (f * rate_in) / (r * od * rate))
+ * Using the constants ratio = rate / rate_in and inv_ratio = rate_in / rate,
+ * error = abs((f * inv_ratio) / (r * od) - 1)
+ * This is the error used in evaluating parameters.
+ *
+ * r and od are four bits each, while f is six bits. Because r and od are
+ * multiplied together, instead of the full 256 values possible if both bits
+ * were used fully, there are only 97 distinct products. Combined with f, there
+ * are 6208 theoretical settings for the PLL. However, most of these settings
+ * can be ruled out immediately because they do not have the correct ratio.
+ *
+ * In addition to the constraint of approximating the desired ratio, parameters
+ * must also keep internal pll frequencies within acceptable ranges. The divided
+ * clock's minimum and maximum frequencies have a ratio of around 128. This
+ * leaves fairly substantial room to work with, especially since the only
+ * affected parameter is r. The VCO's minimum and maximum frequency have a ratio
+ * of 5, which is considerably more restrictive.
+ *
+ * The r and od factors are stored in a table. This is to make it easy to find
+ * the next-largest product. Some products have multiple factorizations, but
+ * only when one factor has at least a 2.5x ratio to the factors of the other
+ * factorization. This is because any smaller ratio would not make a difference
+ * when ensuring the VCO's frequency is within spec.
+ *
+ * Throughout the calculation function, fixed point arithmetic is used. Because
+ * the range of rate and rate_in may be up to 1.75 GHz, or around 2^30, 64-bit
+ * 32.32 fixed-point numbers are used to represent ratios. In general, to
+ * implement division, the numerator is first multiplied by 2^32. This gives a
+ * result where the whole number part is in the upper 32 bits, and the fraction
+ * is in the lower 32 bits.
+ *
+ * In general, rounding is done to the closest integer. This helps find the best
+ * approximation for the ratio. Rounding in one direction (e.g down) could cause
+ * the function to miss a better ratio with one of the parameters increased by
+ * one.
+ */
+
+/*
+ * The factors table was generated with the following python code:
+ *
+ * def p(x, y):
+ * return (1.0*x/y > 2.5) or (1.0*y/x > 2.5)
+ *
+ * factors = {}
+ * for i in range(1, 17):
+ * for j in range(1, 17):
+ * fs = factors.get(i*j) or []
+ * if fs == [] or all([
+ * (p(i, x) and p(i, y)) or (p(j, x) and p(j, y))
+ * for (x, y) in fs]):
+ * fs.append((i, j))
+ * factors[i*j] = fs
+ *
+ * for k, l in sorted(factors.items()):
+ * for v in l:
+ * print("PACK(%s, %s)," % v)
+ */
+#define PACK(r, od) (((((r) - 1) & 0xF) << 4) | (((od) - 1) & 0xF))
+#define UNPACK_R(val) ((((val) >> 4) & 0xF) + 1)
+#define UNPACK_OD(val) (((val) & 0xF) + 1)
+static const u8 factors[] = {
+ PACK(1, 1),
+ PACK(1, 2),
+ PACK(1, 3),
+ PACK(1, 4),
+ PACK(1, 5),
+ PACK(1, 6),
+ PACK(1, 7),
+ PACK(1, 8),
+ PACK(1, 9),
+ PACK(3, 3),
+ PACK(1, 10),
+ PACK(1, 11),
+ PACK(1, 12),
+ PACK(3, 4),
+ PACK(1, 13),
+ PACK(1, 14),
+ PACK(1, 15),
+ PACK(3, 5),
+ PACK(1, 16),
+ PACK(4, 4),
+ PACK(2, 9),
+ PACK(2, 10),
+ PACK(3, 7),
+ PACK(2, 11),
+ PACK(2, 12),
+ PACK(5, 5),
+ PACK(2, 13),
+ PACK(3, 9),
+ PACK(2, 14),
+ PACK(2, 15),
+ PACK(2, 16),
+ PACK(3, 11),
+ PACK(5, 7),
+ PACK(3, 12),
+ PACK(3, 13),
+ PACK(4, 10),
+ PACK(3, 14),
+ PACK(4, 11),
+ PACK(3, 15),
+ PACK(3, 16),
+ PACK(7, 7),
+ PACK(5, 10),
+ PACK(4, 13),
+ PACK(6, 9),
+ PACK(5, 11),
+ PACK(4, 14),
+ PACK(4, 15),
+ PACK(7, 9),
+ PACK(4, 16),
+ PACK(5, 13),
+ PACK(6, 11),
+ PACK(5, 14),
+ PACK(6, 12),
+ PACK(5, 15),
+ PACK(7, 11),
+ PACK(6, 13),
+ PACK(5, 16),
+ PACK(9, 9),
+ PACK(6, 14),
+ PACK(8, 11),
+ PACK(6, 15),
+ PACK(7, 13),
+ PACK(6, 16),
+ PACK(7, 14),
+ PACK(9, 11),
+ PACK(10, 10),
+ PACK(8, 13),
+ PACK(7, 15),
+ PACK(9, 12),
+ PACK(10, 11),
+ PACK(7, 16),
+ PACK(9, 13),
+ PACK(8, 15),
+ PACK(11, 11),
+ PACK(9, 14),
+ PACK(8, 16),
+ PACK(10, 13),
+ PACK(11, 12),
+ PACK(9, 15),
+ PACK(10, 14),
+ PACK(11, 13),
+ PACK(9, 16),
+ PACK(10, 15),
+ PACK(11, 14),
+ PACK(12, 13),
+ PACK(10, 16),
+ PACK(11, 15),
+ PACK(12, 14),
+ PACK(13, 13),
+ PACK(11, 16),
+ PACK(12, 15),
+ PACK(13, 14),
+ PACK(12, 16),
+ PACK(13, 15),
+ PACK(14, 14),
+ PACK(13, 16),
+ PACK(14, 15),
+ PACK(14, 16),
+ PACK(15, 15),
+ PACK(15, 16),
+ PACK(16, 16),
+};
+
+TEST_STATIC int k210_pll_calc_config(u32 rate, u32 rate_in,
+ struct k210_pll_config *best)
+{
+ int i;
+ s64 error, best_error;
+ u64 ratio, inv_ratio; /* fixed point 32.32 ratio of the rates */
+ u64 max_r;
+ u64 r, f, od;
+
+ /*
+ * Can't go over 1.75 GHz or under 21.25 MHz due to limitations on the
+ * VCO frequency. These are not the same limits as below because od can
+ * reduce the output frequency by 16.
+ */
+ if (rate > 1750000000 || rate < 21250000)
+ return -EINVAL;
+
+ /* Similar restrictions on the input rate */
+ if (rate_in > 1750000000 || rate_in < 13300000)
+ return -EINVAL;
+
+ ratio = DIV_ROUND_CLOSEST_ULL((u64)rate << 32, rate_in);
+ inv_ratio = DIV_ROUND_CLOSEST_ULL((u64)rate_in << 32, rate);
+ /* Can't increase by more than 64 or reduce by more than 256 */
+ if (rate > rate_in && ratio > (64ULL << 32))
+ return -EINVAL;
+ else if (rate <= rate_in && inv_ratio > (256ULL << 32))
+ return -EINVAL;
+
+ /*
+ * The divided clock (rate_in / r) must stay between 1.75 GHz and 13.3
+ * MHz. There is no minimum, since the only way to get a higher input
+ * clock than 26 MHz is to use a clock generated by a PLL. Because PLLs
+ * cannot output frequencies greater than 1.75 GHz, the minimum would
+ * never be greater than one.
+ */
+ max_r = DIV_ROUND_DOWN_ULL(rate_in, 13300000);
+
+ /* Variables get immediately incremented, so start at -1th iteration */
+ i = -1;
+ f = 0;
+ r = 0;
+ od = 0;
+ best_error = S64_MAX;
+ error = best_error;
+ /* do-while here so we always try at least one ratio */
+ do {
+ /*
+ * Whether we swapped r and od while enforcing frequency limits
+ */
+ bool swapped = false;
+ u64 last_od = od;
+ u64 last_r = r;
+
+ /*
+ * Try the next largest value for f (or r and od) and
+ * recalculate the other parameters based on that
+ */
+ if (rate > rate_in) {
+ /*
+ * Skip factors of the same product if we already tried
+ * out that product
+ */
+ do {
+ i++;
+ r = UNPACK_R(factors[i]);
+ od = UNPACK_OD(factors[i]);
+ } while (i + 1 < ARRAY_SIZE(factors) &&
+ r * od == last_r * last_od);
+
+ /* Round close */
+ f = (r * od * ratio + BIT(31)) >> 32;
+ if (f > 64)
+ f = 64;
+ } else {
+ u64 tmp = ++f * inv_ratio;
+ bool round_up = !!(tmp & BIT(31));
+ u32 goal = (tmp >> 32) + round_up;
+ u32 err, last_err;
+
+ /* Get the next r/od pair in factors */
+ while (r * od < goal && i + 1 < ARRAY_SIZE(factors)) {
+ i++;
+ r = UNPACK_R(factors[i]);
+ od = UNPACK_OD(factors[i]);
+ }
+
+ /*
+ * This is a case of double rounding. If we rounded up
+ * above, we need to round down (in cases of ties) here.
+ * This prevents off-by-one errors resulting from
+ * choosing X+2 over X when X.Y rounds up to X+1 and
+ * there is no r * od = X+1. For the converse, when X.Y
+ * is rounded down to X, we should choose X+1 over X-1.
+ */
+ err = abs(r * od - goal);
+ last_err = abs(last_r * last_od - goal);
+ if (last_err < err || (round_up && last_err == err)) {
+ i--;
+ r = last_r;
+ od = last_od;
+ }
+ }
+
+ /*
+ * Enforce limits on internal clock frequencies. If we
+ * aren't in spec, try swapping r and od. If everything is
+ * in-spec, calculate the relative error.
+ */
+ while (true) {
+ /*
+ * Whether the intermediate frequencies are out-of-spec
+ */
+ bool out_of_spec = false;
+
+ if (r > max_r) {
+ out_of_spec = true;
+ } else {
+ /*
+ * There is no way to only divide once; we need
+ * to examine the frequency with and without the
+ * effect of od.
+ */
+ u64 vco = DIV_ROUND_CLOSEST_ULL(rate_in * f, r);
+
+ if (vco > 1750000000 || vco < 340000000)
+ out_of_spec = true;
+ }
+
+ if (out_of_spec) {
+ if (!swapped) {
+ u64 tmp = r;
+
+ r = od;
+ od = tmp;
+ swapped = true;
+ continue;
+ } else {
+ /*
+ * Try looking ahead to see if there are
+ * additional factors for the same
+ * product.
+ */
+ if (i + 1 < ARRAY_SIZE(factors)) {
+ u64 new_r, new_od;
+
+ i++;
+ new_r = UNPACK_R(factors[i]);
+ new_od = UNPACK_OD(factors[i]);
+ if (r * od == new_r * new_od) {
+ r = new_r;
+ od = new_od;
+ swapped = false;
+ continue;
+ }
+ i--;
+ }
+ break;
+ }
+ }
+
+ error = DIV_ROUND_CLOSEST_ULL(f * inv_ratio, r * od);
+ /* The lower 16 bits are spurious */
+ error = abs((error - BIT(32))) >> 16;
+
+ if (error < best_error) {
+ best->r = r;
+ best->f = f;
+ best->od = od;
+ best_error = error;
+ }
+ break;
+ }
+ } while (f < 64 && i + 1 < ARRAY_SIZE(factors) && error != 0);
+
+ if (best_error == S64_MAX)
+ return -EINVAL;
+
+ log_debug("best error %lld\n", best_error);
+ return 0;
+}
+
+static ulong k210_pll_set_rate(struct k210_clk_priv *priv, int id, ulong rate,
+ ulong rate_in)
+{
+ int err;
+ const struct k210_pll_params *pll = &k210_plls[id];
+ struct k210_pll_config config = {};
+ u32 reg;
+
+ if (rate_in < 0)
+ return rate_in;
+
+ log_debug("Calculating parameters with rate=%lu and rate_in=%lu\n",
+ rate, rate_in);
+ err = k210_pll_calc_config(rate, rate_in, &config);
+ if (err)
+ return err;
+ log_debug("Got r=%u f=%u od=%u\n", config.r, config.f, config.od);
+
+ /*
+ * Don't use clk_disable as it might not actually disable the pll due to
+ * refcounting
+ */
+ k210_pll_disable(priv, id);
+
+ reg = readl(priv->base + pll->off);
+ reg &= ~K210_PLL_CLKR
+ & ~K210_PLL_CLKF
+ & ~K210_PLL_CLKOD
+ & ~K210_PLL_BWADJ;
+ reg |= FIELD_PREP(K210_PLL_CLKR, config.r - 1)
+ | FIELD_PREP(K210_PLL_CLKF, config.f - 1)
+ | FIELD_PREP(K210_PLL_CLKOD, config.od - 1)
+ | FIELD_PREP(K210_PLL_BWADJ, config.f - 1);
+ writel(reg, priv->base + pll->off);
+
+ err = k210_pll_enable(priv, id);
+
+ serial_setbrg();
+ return k210_pll_get_rate(priv, id, rate);
+}
+#else
+static ulong k210_pll_set_rate(struct k210_clk_priv *priv, int id, ulong rate,
+ ulong rate_in)
+{
+ return -ENOSYS;
+}
+#endif /* CONFIG_CLK_K210_SET_RATE */
+
+static ulong k210_pll_get_rate(struct k210_clk_priv *priv, int id,
+ ulong rate_in)
+{
+ u64 r, f, od;
+ u32 reg = readl(priv->base + k210_plls[id].off);
+
+ if (rate_in < 0 || (reg & K210_PLL_BYPASS))
+ return rate_in;
+
+ if (!(reg & K210_PLL_PWRD))
+ return 0;
+
+ r = FIELD_GET(K210_PLL_CLKR, reg) + 1;
+ f = FIELD_GET(K210_PLL_CLKF, reg) + 1;
+ od = FIELD_GET(K210_PLL_CLKOD, reg) + 1;
+
+ return DIV_ROUND_DOWN_ULL(((u64)rate_in) * f, r * od);
+}
+
+/*
+ * Wait for the PLL to be locked. If the PLL is not locked, try clearing the
+ * slip before retrying
+ */
+static void k210_pll_waitfor_lock(struct k210_clk_priv *priv, int id)
+{
+ const struct k210_pll_params *pll = &k210_plls[id];
+ u32 mask = (BIT(pll->width) - 1) << pll->shift;
+
+ while (true) {
+ u32 reg = readl(priv->base + K210_SYSCTL_PLL_LOCK);
+
+ if ((reg & mask) == mask)
+ break;
+
+ reg |= BIT(pll->shift + K210_PLL_CLEAR_SLIP);
+ writel(reg, priv->base + K210_SYSCTL_PLL_LOCK);
+ }
+}
+
+/* Adapted from sysctl_pll_enable */
+static int k210_pll_enable(struct k210_clk_priv *priv, int id)
+{
+ const struct k210_pll_params *pll = &k210_plls[id];
+ u32 reg = readl(priv->base + pll->off);
+
+ if ((reg & K210_PLL_PWRD) && (reg & K210_PLL_EN) &&
+ !(reg & K210_PLL_RESET))
+ return 0;
+
+ reg |= K210_PLL_PWRD;
+ writel(reg, priv->base + pll->off);
+
+ /* Ensure reset is low before asserting it */
+ reg &= ~K210_PLL_RESET;
+ writel(reg, priv->base + pll->off);
+ reg |= K210_PLL_RESET;
+ writel(reg, priv->base + pll->off);
+ nop();
+ nop();
+ reg &= ~K210_PLL_RESET;
+ writel(reg, priv->base + pll->off);
+
+ k210_pll_waitfor_lock(priv, id);
+
+ reg &= ~K210_PLL_BYPASS;
+ reg |= K210_PLL_EN;
+ writel(reg, priv->base + pll->off);
+
+ return 0;
+}
+
+static int k210_pll_disable(struct k210_clk_priv *priv, int id)
+{
+ const struct k210_pll_params *pll = &k210_plls[id];
+ u32 reg = readl(priv->base + pll->off);
+
+ /*
+ * Bypassing before powering off is important so child clocks don't stop
+ * working. This is especially important for pll0, the indirect parent
+ * of the cpu clock.
+ */
+ reg |= K210_PLL_BYPASS;
+ writel(reg, priv->base + pll->off);
+
+ reg &= ~K210_PLL_PWRD;
+ reg &= ~K210_PLL_EN;
+ writel(reg, priv->base + pll->off);
+ return 0;
+}
+
static u32 k210_clk_readl(struct k210_clk_priv *priv, u8 off, u8 shift,
u8 width)
{