Commit 52eba8dd authored by Ben Skeggs's avatar Ben Skeggs

drm/nva3/clk: better pll calculation when no fractional fb div available

The core/mem/shader clocks don't support the fractional feedback divider,
causing our calculated clocks to be off by quite a lot in some cases.  To
solve this we will switch to a search-based algorithm when fN is NULL.

For my NVA8 at PL3, this actually generates identical cooefficients to
the binary driver.  Hopefully that's a good sign, and that does not
break VPLL calculation for someone..
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent 96d1fcf8
...@@ -1353,7 +1353,7 @@ bool nv50_gpio_irq_enable(struct drm_device *, enum dcb_gpio_tag, bool on); ...@@ -1353,7 +1353,7 @@ bool nv50_gpio_irq_enable(struct drm_device *, enum dcb_gpio_tag, bool on);
/* nv50_calc. */ /* nv50_calc. */
int nv50_calc_pll(struct drm_device *, struct pll_lims *, int clk, int nv50_calc_pll(struct drm_device *, struct pll_lims *, int clk,
int *N1, int *M1, int *N2, int *M2, int *P); int *N1, int *M1, int *N2, int *M2, int *P);
int nv50_calc_pll2(struct drm_device *, struct pll_lims *, int nva3_calc_pll(struct drm_device *, struct pll_lims *,
int clk, int *N, int *fN, int *M, int *P); int clk, int *N, int *fN, int *M, int *P);
#ifndef ioread32_native #ifndef ioread32_native
......
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
*/ */
#include "drmP.h" #include "drmP.h"
#include "drm_fixed.h"
#include "nouveau_drv.h" #include "nouveau_drv.h"
#include "nouveau_hw.h" #include "nouveau_hw.h"
...@@ -47,45 +46,52 @@ nv50_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk, ...@@ -47,45 +46,52 @@ nv50_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk,
} }
int int
nv50_calc_pll2(struct drm_device *dev, struct pll_lims *pll, int clk, nva3_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk,
int *N, int *fN, int *M, int *P) int *pN, int *pfN, int *pM, int *P)
{ {
fixed20_12 fb_div, a, b; u32 best_err = ~0, err;
u32 refclk = pll->refclk / 10; int M, lM, hM, N, fN;
u32 max_vco_freq = pll->vco1.maxfreq / 10;
u32 max_vco_inputfreq = pll->vco1.max_inputfreq / 10;
clk /= 10;
*P = max_vco_freq / clk; *P = pll->vco1.maxfreq / clk;
if (*P > pll->max_p) if (*P > pll->max_p)
*P = pll->max_p; *P = pll->max_p;
if (*P < pll->min_p) if (*P < pll->min_p)
*P = pll->min_p; *P = pll->min_p;
/* *M = floor((refclk + max_vco_inputfreq) / max_vco_inputfreq); */ lM = (pll->refclk + pll->vco1.max_inputfreq) / pll->vco1.max_inputfreq;
a.full = dfixed_const(refclk + max_vco_inputfreq); lM = max(lM, (int)pll->vco1.min_m);
b.full = dfixed_const(max_vco_inputfreq); hM = (pll->refclk + pll->vco1.min_inputfreq) / pll->vco1.min_inputfreq;
a.full = dfixed_div(a, b); hM = min(hM, (int)pll->vco1.max_m);
a.full = dfixed_floor(a);
*M = dfixed_trunc(a);
/* fb_div = (vco * *M) / refclk; */ for (M = lM; M <= hM; M++) {
fb_div.full = dfixed_const(clk * *P); u32 tmp = clk * *P * M;
fb_div.full = dfixed_mul(fb_div, a); N = tmp / pll->refclk;
a.full = dfixed_const(refclk); fN = tmp % pll->refclk;
fb_div.full = dfixed_div(fb_div, a); if (!pfN && fN >= pll->refclk / 2)
N++;
/* *N = floor(fb_div); */ if (N < pll->vco1.min_n)
a.full = dfixed_floor(fb_div); continue;
*N = dfixed_trunc(fb_div); if (N > pll->vco1.max_n)
break;
/* *fN = (fmod(fb_div, 1.0) * 8192) - 4096; */ err = abs(clk - (pll->refclk * N / M / *P));
b.full = dfixed_const(8192); if (err < best_err) {
a.full = dfixed_mul(a, b); best_err = err;
fb_div.full = dfixed_mul(fb_div, b); *pN = N;
fb_div.full = fb_div.full - a.full; *pM = M;
*fN = dfixed_trunc(fb_div) - 4096; }
*fN &= 0xffff;
if (pfN) {
*pfN = (((fN << 13) / pll->refclk) - 4096) & 0xffff;
return clk; return clk;
}
}
if (unlikely(best_err == ~0)) {
NV_ERROR(dev, "unable to find matching pll values\n");
return -EINVAL;
}
return pll->refclk * *pN / *pM / *P;
} }
...@@ -286,7 +286,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk) ...@@ -286,7 +286,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
nv_wr32(dev, pll.reg + 8, reg2 | (P << 28) | (M2 << 16) | N2); nv_wr32(dev, pll.reg + 8, reg2 | (P << 28) | (M2 << 16) | N2);
} else } else
if (dev_priv->chipset < NV_C0) { if (dev_priv->chipset < NV_C0) {
ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P); ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P);
if (ret <= 0) if (ret <= 0)
return 0; return 0;
...@@ -298,7 +298,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk) ...@@ -298,7 +298,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
nv_wr32(dev, pll.reg + 4, reg1 | (P << 16) | (M1 << 8) | N1); nv_wr32(dev, pll.reg + 4, reg1 | (P << 16) | (M1 << 8) | N1);
nv_wr32(dev, pll.reg + 8, N2); nv_wr32(dev, pll.reg + 8, N2);
} else { } else {
ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P); ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P);
if (ret <= 0) if (ret <= 0)
return 0; return 0;
......
...@@ -104,7 +104,7 @@ nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl, ...@@ -104,7 +104,7 @@ nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
{ {
struct nva3_pm_state *pll; struct nva3_pm_state *pll;
struct pll_lims limits; struct pll_lims limits;
int N, fN, M, P, diff; int N, M, P, diff;
int ret, off; int ret, off;
ret = get_pll_limits(dev, id, &limits); ret = get_pll_limits(dev, id, &limits);
...@@ -136,7 +136,7 @@ nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl, ...@@ -136,7 +136,7 @@ nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
} }
if (!pll->new_div) { if (!pll->new_div) {
ret = nv50_calc_pll2(dev, &limits, khz, &N, &fN, &M, &P); ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P);
if (ret < 0) if (ret < 0)
return ERR_PTR(ret); return ERR_PTR(ret);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment